def train_model(wrapped_model, model, model_path, train_loader, test_loader, init_lr, epochs, args):
    train_loss_f = refinery_loss.RefineryLoss()
    val_loss_f = nn.CrossEntropyLoss()
    best_model_path = '.'.join(model_path.split('.')[:-1]) + '.best.pth'

    # tracking stats
    if not hasattr(model, 'stats'):
        model.stats = {'train_loss': [], 'test_acc': [], 'test_loss': [],
                       'weight': [], 'lr': [], 'macs': [], 'efficiency': []}
        start_epoch = 1
        best_acc = 0
    else:
        start_epoch = len(model.stats['test_loss'])
        best_acc = max(model.stats['test_acc']).item()

    curr_weights, _ = util.num_nonzeros(model)
    if hasattr(model, 'packed_layer_size'):
        macs = np.sum([x*y for x, y in model.packed_layer_size])
    else:
        macs = curr_weights

    # optimizer
    optimizer = optim.RMSprop(util.group_weight(model), lr=init_lr, momentum=0.9, alpha=0.9,
                              weight_decay=0, eps=1.0)
    print("Optimizer:")
    print(optimizer)


    # pruning stage
    for epoch in range(start_epoch, epochs + 1):
        print('[Epoch {}]'.format(epoch))
        for g in optimizer.param_groups:     
            lr = g['lr']                    
            break        

        train_loss = util.train(train_loader, wrapped_model, train_loss_f, optimizer, epoch-1, args)
        test_loss, test_acc = util.validate(test_loader, model, val_loss_f, epoch-1, args)

        print('LR        :: {}'.format(lr))
        print('Train Loss:: {}'.format(train_loss))
        print('Test  Loss:: {}'.format(test_loss))
        print('Test  Acc.:: {}'.format(test_acc))
        print('Nonzeros  :: {}'.format(curr_weights))
        print('')
        print('')
        model.stats['lr'].append(lr)
        model.optimizer = optimizer.state_dict()

        model.cpu()
        torch.save(model, model_path)
        if test_acc > best_acc:
            print('New best model found')
            torch.save(model, best_model_path)
            best_acc = test_acc

        model.cuda()
 def _cur_tag_X_and_y(self, tag, test_random=False, all_ground_truth_binary=True):
     """
     """
     sorted_sources = self.sorted_sources[tag]
     cur_song_list = self.song_lists[tag]
     features_dict = self.features[tag]
     y = rc.matrix(self._dict_to_vec(self.ground_truth[tag], self.song_lists[tag]))
     if util.num_nonzeros(y) < self.min_tag_count:
         self._remove_tag(tag, verbosity=0)
         return
     # Possibly add intercept.
     ncol = 0
     x_vec = []
     if "intercept" in sorted_sources:
         ncol += 1
         x_vec.extend([1.0 for songid in cur_song_list])
     # Add Scrobble counts
     scrobble_vec = numpy.array([self.log_scrobble_counts.get(songid, self.avg_scrobble) for songid in cur_song_list])
     if "scrobble" in sorted_sources:
         ncol += 1
         x_vec.extend(self._standardize(scrobble_vec))
     # Add remaining features.
     for source in sorted_sources:
         if source.endswith("_interaction"):
             main_source = util.remove_trailing_string(source, "_interaction")
             main_vec = self._dict_to_vec(features_dict.get(main_source, None), cur_song_list)
             feature_vec = numpy.multiply(main_vec, scrobble_vec) # pointwise product
         elif source not in ["intercept", "scrobble"]:
             feature_vec = self._dict_to_vec(features_dict.get(source, None), cur_song_list)
         else:
             continue
         x_vec.extend(self._standardize(feature_vec))
         ncol += 1
     try:
         X = rc.matrix(x_vec,ncol=ncol)#dimnames=[[],x_sources])
     except:
         pdb.set_trace()
         # Temporary fix: rmme!
         self._remove_tag(tag)
         return
     if test_random and not self.production_run: # Erase all of the above and do some random numbers for testing.
         n_songs = len(X)
         X = numpy.random.standard_normal((n_songs,ncol))
         if ncol==1:
             y = 3 * X[:,0] + 0.5*numpy.random.standard_normal((1,n_songs))
         else:
             y = 3 * X[:,0] + X[:,1] + 0.5*numpy.random.standard_normal((1,n_songs))
         y = y.transpose()
     if all_ground_truth_binary or self.regtype=="Independent Logistic" or self.regtype=="Hierarchical Logistic":
         # Convert y to 0's and 1's.
         y = 1.0*(numpy.array(y)>0) # multiply by 1.0 to make Float
     self.X[tag] = X
     self.y[tag] = y
Example #3
0
def train_model(wrapped_model, model, train_loss_f, model_path, train_loader,
                test_loader, init_lr, epochs, args):
    val_loss_f = nn.CrossEntropyLoss()
    best_model_path = '.'.join(model_path.split('.')[:-1]) + '.best.pth'

    # tracking stats
    if not hasattr(model, 'stats'):
        model.stats = {
            'train_loss': [],
            'test_acc': [],
            'test_loss': [],
            'weight': [],
            'lr': [],
            'macs': [],
            'efficiency': []
        }
        start_epoch = 1
    else:
        start_epoch = len(model.stats['test_loss'])

    curr_weights, _ = util.num_nonzeros(model)
    if hasattr(model, 'packed_layer_size'):
        macs = np.sum([x * y for x, y in model.packed_layer_size])
    else:
        macs = curr_weights

    # optimizer
    optimizer = optim.RMSprop(util.group_weight(model),
                              lr=init_lr,
                              momentum=0.9,
                              alpha=0.9,
                              weight_decay=4e-5,
                              eps=1.0)
    print("Optimizer:")
    print(optimizer)
    best_acc = 0

    prune_epoch = 0
    max_prune_rate = 0.8
    final_prune_epoch = int(0.9 * args.epochs)
    num_prune_epochs = 10
    prune_rates = [
        max_prune_rate * (1 - (1 - (i / num_prune_epochs))**3)
        for i in range(num_prune_epochs)
    ]
    prune_rates[-1] = max_prune_rate
    prune_epochs = np.linspace(0, final_prune_epoch,
                               num_prune_epochs).astype('i').tolist()

    prune_rate = 0.1
    prune_total = 0.0
    prune_cycle = 8
    max_prune = 0.7

    # pruning stage
    for epoch in range(start_epoch, epochs + 1):
        print('[Epoch {}]'.format(epoch))
        for g in optimizer.param_groups:
            lr = g['lr']
            break

        if epoch % prune_cycle == 0 and prune_total < max_prune:
            prune_total += prune_rate
            print('Prune Total: {:2.2f}'.format(100. * prune_total))
            util.prune(model, prune_total)
            packing.pack_model(model, args.gamma)
            macs = np.sum([x * y for x, y in model.packed_layer_size])
            curr_weights, num_weights = util.num_nonzeros(model)

        train_loss = util.train(train_loader, wrapped_model, train_loss_f,
                                optimizer, epoch - 1, args)
        test_loss, test_acc = util.validate(test_loader, model, val_loss_f,
                                            epoch - 1, args)

        print('LR        :: {}'.format(lr))
        print('Train Loss:: {}'.format(train_loss))
        print('Test  Loss:: {}'.format(test_loss))
        print('Test  Acc.:: {}'.format(test_acc))
        print('Nonzeros  :: {}'.format(curr_weights))
        print('')
        print('')
        model.stats['lr'].append(lr)
        model.optimizer = optimizer.state_dict()

        model.cpu()
        torch.save(model, model_path)
        if test_acc > best_acc and prune_total >= max_prune:
            print('New best model found')
            torch.save(model, best_model_path)
            best_acc = test_acc

        model.cuda()
Example #4
0
    # load dataset
    data = datasets.get_dataset(args.dataset_root,
                                args.dataset,
                                args.batch_size,
                                args.cuda,
                                args.aug,
                                in_memory=args.in_memory,
                                input_size=args.input_size)
    train_dataset, train_loader, test_dataset, test_loader = data

    # teacher model
    if args.teacher_path == None:
        wrapped_model = net.WrappedModel(model)
        train_loss_f = nn.CrossEntropyLoss()
    else:
        teacher = ResNet50()
        teacher.load_state_dict(torch.load(args.teacher_path))
        teacher.cuda()
        wrapped_model = ModelRefineryWrapper(model, teacher)
        train_loss_f = refinery_loss.RefineryLoss()

    if args.cuda:
        model = model.cuda()

    print(model)
    print(util.num_nonzeros(model))
    print('Target Nonzeros:', util.target_nonzeros(model))

    train_model(wrapped_model, model, train_loss_f, args.save_path,
                train_loader, test_loader, args.lr, args.epochs, args)
Example #5
0
import util
import matplotlib
import matplotlib.pyplot as plt
from config import init_mpl_settings
init_mpl_settings()

colors = ['r', 'g', 'b']
alpha = 8
beta = 0.7
gammas = [0.0, 0.25, 0.5, 0.75, 1.0, 1.25, 1.5, 1.75]
gstats = []
for gamma in gammas:
    model_path = 'models/cifar10/cifar10-shift-{}.pth'.format(gamma)
    model = torch.load(model_path).cpu()
    macs = np.sum([x * y for x, y in model.packed_layer_size])
    curr_weights, _ = util.num_nonzeros(model)
    gstats.append(model.stats)

accs, effs, weights = [], [], []
for i, gamma in enumerate(gammas):
    accs.append(gstats[i]['test_acc'][-1])
    effs.append(gstats[i]['efficiency'][-1])
    weights.append(gstats[i]['weight'][-1])

fig, ax1 = plt.subplots()
ax1.set_title('CIFAR-10 VGG-19 (1x1)')
ln1 = ax1.plot(gammas,
               accs,
               '-o',
               color=colors[2],
               linewidth=3,
Example #6
0
def train(model, train_loader, val_loader, args):
    criterion = nn.CrossEntropyLoss().cuda()
    optimizer = torch.optim.SGD(model.parameters(),
                                args.lr,
                                momentum=args.momentum,
                                weight_decay=args.weight_decay)
    scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, args.epochs)
    prune_epoch = 0
    max_prune_rate = 0.85
    max_prune_rate = 0.8
    final_prune_epoch = int(0.5 * args.epochs)
    num_prune_epochs = 10
    prune_rates = [
        max_prune_rate * (1 - (1 - (i / num_prune_epochs))**3)
        for i in range(num_prune_epochs)
    ]
    prune_rates[-1] = max_prune_rate
    prune_epochs = np.linspace(0, final_prune_epoch,
                               num_prune_epochs).astype('i').tolist()
    print("Pruning Epochs: {}".format(prune_epochs))
    print("Pruning Rates: {}".format(prune_rates))

    curr_weights, num_weights = util.num_nonzeros(model)
    macs = curr_weights

    model.stats = {
        'train_loss': [],
        'test_acc': [],
        'test_loss': [],
        'weight': [],
        'lr': [],
        'macs': [],
        'efficiency': []
    }
    best_path = args.save_path.split('.pth')[0] + '.best.pth'
    best_test_acc = 0
    for epoch in range(1, args.epochs + 1):
        scheduler.step()
        for g in optimizer.param_groups:
            lr = g['lr']
            break

        # prune smallest weights up to a set prune_rate
        if epoch in prune_epochs:
            util.prune(model, prune_rates[prune_epoch])
            curr_weights, num_weights = util.num_nonzeros(model)
            packing.pack_model(model, args.gamma)
            macs = np.sum([x * y for x, y in model.packed_layer_size])
            curr_weights, num_weights = util.num_nonzeros(model)
            prune_epoch += 1

        if epoch == prune_epochs[-1]:
            # disable l1 penalty, as target sparsity is reached
            args.l1_penalty = 0

        print('     :: [{}]\tLR {:.4f}\tNonzeros ({}/{})'.format(
            epoch, lr, curr_weights, num_weights))
        train_loss = util.train(train_loader, model, criterion, optimizer,
                                epoch, args)
        test_loss, test_acc = util.validate(val_loader, model, criterion,
                                            epoch, args)

        is_best = test_acc > best_test_acc
        best_test_acc = max(test_acc, best_test_acc)
        model.stats['lr'].append(lr)
        model.stats['macs'].append(macs)
        model.stats['weight'].append(curr_weights)
        model.stats['efficiency'].append(100.0 * (curr_weights / macs))
        model.optimizer = optimizer.state_dict()
        model.epoch = epoch

        model.cpu()
        torch.save(model, args.save_path)
        if is_best:
            torch.save(model, best_path)
        model.cuda()