Esempio n. 1
0
def train(args, model, device, train_loader, test_loader, optimizer):
    for epoch in range(args.num_pre_epochs):
        print('Pre epoch: {}'.format(epoch + 1))
        model.train()
        for batch_idx, (data, target) in enumerate(tqdm(train_loader)):
            data, target = data.to(device), target.to(device)
            optimizer.zero_grad()
            output = model(data)
            loss = regularized_nll_loss(args, model, output, target)
            loss.backward()
            optimizer.step()
        test(args, model, device, test_loader)

    Z, U = initialize_Z_and_U(model)
    for epoch in range(args.num_epochs):
        model.train()
        print('Epoch: {}'.format(epoch + 1))
        for batch_idx, (data, target) in enumerate(tqdm(train_loader)):
            data, target = data.to(device), target.to(device)
            optimizer.zero_grad()
            output = model(data)
            loss = admm_loss(args, device, model, Z, U, output, target)
            loss.backward()
            optimizer.step()
        X = update_X(model)
        Z = update_Z_l1(X, U, args) if args.l1 else update_Z(X, U, args)
        U = update_U(U, X, Z)
        print_convergence(model, X, Z)
        test(args, model, device, test_loader)
Esempio n. 2
0
def train(args, model, device, train_loader, test_loader, optimizer):
    train_start = time.time()
    Z, U = initialize_Z_and_U(model, device)
    for epoch in range(args.num_epochs):
        print('Epoch: {}'.format(epoch + 1))
        model.train()
        epoch_start = time.time()
        for batch_idx, (data, target) in enumerate(train_loader):
            data, target = data.to(device), target.to(device)
            optimizer.zero_grad()
            output = model(data)
            loss = admm_loss(args, device, model, Z, U, output, target)
            loss.backward()
            optimizer.step()
        epoch_end = time.time()
        print("train epoch time cost: {}".format(epoch_end - epoch_start))
        admm_step_start = time.time()
        X = update_X(model, device)
        Z = update_Z_l1(X, U, args) if args.l1 else update_Z(
            X, U, args, device)
        U = update_U(U, X, Z)
        admm_step_end = time.time()
        print("admm step time cost: {}".format(admm_step_end -
                                               admm_step_start))
        print_convergence(model, X, Z)
        test(args, model, device, test_loader)
    train_end = time.time()
    print("train total time cost: {}".format(train_end - train_start))
def train_one_epoch(model,
                    criterion,
                    admm_optimizer,
                    data_loader,
                    device,
                    epoch,
                    print_freq,
                    layer_names,
                    percent,
                    pattern,
                    Z,
                    U,
                    arg_rho,
                    apex=False):

    # Plot([float(x) for x in list(Z[layer_names[-1]].flatten())], plot_type=2)

    model.train()
    metric_logger = utils.MetricLogger(delimiter="  ")
    metric_logger.add_meter('lr',
                            utils.SmoothedValue(window_size=1, fmt='{value}'))
    metric_logger.add_meter('img/s',
                            utils.SmoothedValue(window_size=10, fmt='{value}'))

    header = 'Epoch: [{}]'.format(epoch)

    rho = arg_rho
    for image, target in metric_logger.log_every(data_loader, print_freq,
                                                 header):
        start_time = time.time()
        image, target = image.to(device), target.to(device)
        output = model(image)

        loss = utils.admm_loss(device, model, layer_names, criterion, Z, U,
                               output, target, rho)

        admm_optimizer.zero_grad()
        if apex:
            with amp.scale_loss(loss, admm_optimizer) as scaled_loss:
                scaled_loss.backward()
        else:
            loss.backward()
        admm_optimizer.step()

        acc1, acc5 = utils.accuracy(output, target, topk=(1, 5))
        batch_size = image.shape[0]
        metric_logger.update(loss=loss.item(),
                             lr=admm_optimizer.param_groups[0]["lr"])
        metric_logger.meters['acc1'].update(acc1.item(), n=batch_size)
        metric_logger.meters['acc5'].update(acc5.item(), n=batch_size)
        metric_logger.meters['img/s'].update(batch_size /
                                             (time.time() - start_time))

    print("=" * 10, "Entering ADMM Optimization")
    X = utils.update_X(model, layer_names)
    Z, layer_pattern = utils.update_Z_Pattern(X, U, layer_names, pattern)
    U = utils.update_U(U, X, Z, layer_names)

    return Z, U
def train(args, model, device, train_loader, test_loader, optimizer):
    loss_iter = []
    for epoch in range(args.num_pre_epochs):

        print('Pre epoch: {}'.format(epoch + 1))
        model.train()
        for batch_idx, (data, target) in enumerate(tqdm(train_loader)):
            data, target = data.to(device), target.to(device)
            optimizer.zero_grad()
            output = model(data)
            loss = regularized_nll_loss(args, model, output, target)
            loss_iter.append(loss)
            loss.backward()
            optimizer.step()
        test(args, model, device, test_loader)

    Z, U = initialize_Z_and_U(model)  #初始化 Z,U
    A = np.zeros((args.idx, args.num_epochs))
    for epoch in range(args.num_epochs):
        model.train()
        print('Epoch: {}'.format(epoch + 1))
        for batch_idx, (data, target) in enumerate(tqdm(train_loader)):
            data, target = data.to(device), target.to(device)
            optimizer.zero_grad()
            output = model(data)
            loss = admm_loss(args, device, model, Z, U, output, target)
            loss.backward()
            optimizer.step()
        X = update_X(model)  #更新X
        #Z的更新根据正则项来选择
        if (args.l1):
            Z = update_Z_l1(X, U, args)
        elif (args.l0):
            Z = update_Z_l0(X, U, args)
        elif (args.SCAD):
            Z = update_Z_SCAD(X, U, args)
        elif (args.rscad):
            print('use rscad updata z')
            Z = updata_Z_Prox_glarho(X, U, args)
        else:
            Z = update_Z(X, U, args)
        #根据稀疏项 选择跟新Z 方式
        U = update_U(U, X, Z)

        if not args.test_lamda:
            a = print_convergence(model, X, Z)
            for i in range(args.idx):
                A[i, epoch] = a[i]

        test(args, model, device, test_loader)
    return A