Example #1
0
    def train(class_dist_threshold_list):
        G.train()
        F1.train()
        optimizer_g = optim.SGD(params,
                                momentum=0.9,
                                weight_decay=0.0005,
                                nesterov=True)
        optimizer_f = optim.SGD(list(F1.parameters()),
                                lr=1.0,
                                momentum=0.9,
                                weight_decay=0.0005,
                                nesterov=True)

        def zero_grad_all():
            optimizer_g.zero_grad()
            optimizer_f.zero_grad()

        param_lr_g = []
        for param_group in optimizer_g.param_groups:
            param_lr_g.append(param_group["lr"])
        param_lr_f = []
        for param_group in optimizer_f.param_groups:
            param_lr_f.append(param_group["lr"])

        # Setting the loss function to be used for the classification loss
        if args.loss == 'CE':
            criterion = nn.CrossEntropyLoss().to(device)
        if args.loss == 'FL':
            criterion = FocalLoss(alpha=1, gamma=args.gamma).to(device)
        if args.loss == 'CBFL':
            # Calculating the list having the number of examples per class which is going to be used in the CB focal loss
            beta = args.beta
            effective_num = 1.0 - np.power(beta, class_num_list)
            per_cls_weights = (1.0 - beta) / np.array(effective_num)
            per_cls_weights = per_cls_weights / np.sum(per_cls_weights) * len(
                class_num_list)
            per_cls_weights = torch.FloatTensor(per_cls_weights).to(device)
            criterion = CBFocalLoss(weight=per_cls_weights,
                                    gamma=args.gamma).to(device)

        all_step = args.steps
        data_iter_s = iter(source_loader)
        data_iter_t = iter(target_loader)
        data_iter_t_unl = iter(target_loader_unl)
        len_train_source = len(source_loader)
        len_train_target = len(target_loader)
        len_train_target_semi = len(target_loader_unl)
        best_acc = 0
        counter = 0
        """
        x = torch.load("./freezed_models/alexnet_p2r.ckpt.best.pth.tar")
        G.load_state_dict(x['G_state_dict'])
        F1.load_state_dict(x['F1_state_dict'])
        optimizer_f.load_state_dict(x['optimizer_f'])
        optimizer_g.load_state_dict(x['optimizer_g'])
        """
        reg_weight = args.reg
        for step in range(all_step):
            optimizer_g = inv_lr_scheduler(param_lr_g,
                                           optimizer_g,
                                           step,
                                           init_lr=args.lr)
            optimizer_f = inv_lr_scheduler(param_lr_f,
                                           optimizer_f,
                                           step,
                                           init_lr=args.lr)
            lr = optimizer_f.param_groups[0]['lr']
            # condition for restarting the iteration for each of the data loaders
            if step % len_train_target == 0:
                data_iter_t = iter(target_loader)
            if step % len_train_target_semi == 0:
                data_iter_t_unl = iter(target_loader_unl)
            if step % len_train_source == 0:
                data_iter_s = iter(source_loader)
            data_t = next(data_iter_t)
            data_t_unl = next(data_iter_t_unl)
            data_s = next(data_iter_s)
            with torch.no_grad():
                im_data_s.resize_(data_s[0].size()).copy_(data_s[0])
                gt_labels_s.resize_(data_s[1].size()).copy_(data_s[1])
                im_data_t.resize_(data_t[0].size()).copy_(data_t[0])
                gt_labels_t.resize_(data_t[1].size()).copy_(data_t[1])
                im_data_tu.resize_(data_t_unl[0].size()).copy_(data_t_unl[0])

            zero_grad_all()
            if args.uda == 1:
                data = im_data_s
                target = gt_labels_s
            else:
                data = torch.cat((im_data_s, im_data_t), 0)
                target = torch.cat((gt_labels_s, gt_labels_t), 0)
            #print(data.shape)
            output = G(data)
            out1 = F1(output)
            if args.attribute is not None:
                if args.net == 'resnet34':
                    reg_loss = regularizer(F1.fc3.weight, att)
                    loss = criterion(out1, target) + reg_weight * reg_loss
                else:
                    reg_loss = regularizer(F1.fc2.weight, att)
                    loss = criterion(out1, target) + reg_weight * reg_loss
            else:
                reg_loss = torch.tensor(0)
                loss = criterion(out1, target)

            if args.attribute is not None:
                if step % args.save_interval == 0 and step != 0:
                    reg_weight = 0.5 * reg_weight
                    print("Reduced Reg weight to: ", reg_weight)

            loss.backward(retain_graph=True)
            optimizer_g.step()
            optimizer_f.step()
            zero_grad_all()
            if not args.method == 'S+T':
                output = G(im_data_tu)
                if args.method == 'ENT':
                    loss_t = entropy(F1, output, args.lamda)
                    #print(loss_t.cpu().data.item())
                    loss_t.backward()
                    optimizer_f.step()
                    optimizer_g.step()
                elif args.method == 'MME':
                    loss_t = adentropy(F1, output, args.lamda,
                                       class_dist_threshold_list)
                    loss_t.backward()
                    optimizer_f.step()
                    optimizer_g.step()
                else:
                    raise ValueError('Method cannot be recognized.')
                log_train = 'S {} T {} Train Ep: {} lr{} \t ' \
                            'Loss Classification: {:.6f} Reg: {:.6f} Loss T {:.6f} ' \
                            'Method {}\n'.format(args.source, args.target,
                                                step, lr, loss.data, reg_weight*reg_loss.data,
                                                -loss_t.data, args.method)
            else:
                log_train = 'S {} T {} Train Ep: {} lr{} \t ' \
                            'Loss Classification: {:.6f} Reg: {:.6f} Method {}\n'.\
                    format(args.source, args.target,
                        step, lr, loss.data, reg_weight * reg_loss.data,
                        args.method)
            G.zero_grad()
            F1.zero_grad()
            zero_grad_all()
            if step % args.log_interval == 0:
                print(log_train)
            if step % args.save_interval == 0 and step > 0:
                loss_val, acc_val = test(target_loader_val)
                loss_test, acc_test = test(target_loader_test)
                G.train()
                F1.train()
                if acc_val >= best_acc:
                    best_acc = acc_val
                    best_acc_test = acc_test
                    counter = 0
                else:
                    counter += 1
                if args.early:
                    if counter > args.patience:
                        break
                print('best acc test %f best acc val %f' %
                      (best_acc_test, acc_val))
                print('record %s' % record_file)
                with open(record_file, 'a') as f:
                    f.write('step %d best %f final %f \n' %
                            (step, best_acc_test, acc_val))
                G.train()
                F1.train()
                #saving model as a checkpoint dict having many things
                if args.save_check:
                    print('saving model')
                    is_best = True if counter == 0 else False
                    save_mymodel(
                        args, {
                            'step': step,
                            'arch': args.net,
                            'G_state_dict': G.state_dict(),
                            'F1_state_dict': F1.state_dict(),
                            'best_acc_test': best_acc_test,
                            'optimizer_g': optimizer_g.state_dict(),
                            'optimizer_f': optimizer_f.state_dict(),
                        }, is_best, time_stamp)
Example #2
0
def train():
    G.train()
    F1.train()
    optimizer_g = optim.SGD(params, momentum=0.9,
                            weight_decay=0.0005, nesterov=True)
    optimizer_f = optim.SGD(list(F1.parameters()), lr=1.0, momentum=0.9,
                            weight_decay=0.0005, nesterov=True)

    def zero_grad_all():
        optimizer_g.zero_grad()
        optimizer_f.zero_grad()
    param_lr_g = []
    for param_group in optimizer_g.param_groups:
        param_lr_g.append(param_group["lr"])
    param_lr_f = []
    for param_group in optimizer_f.param_groups:
        param_lr_f.append(param_group["lr"])
    criterion = nn.CrossEntropyLoss().cuda()
    all_step = args.steps
    data_iter_s = iter(source_loader)
    data_iter_t = iter(target_loader)
    data_iter_t_unl = iter(target_loader_unl)
    len_train_source = len(source_loader)
    len_train_target = len(target_loader)
    len_train_target_semi = len(target_loader_unl)
    best_acc = 0
    counter = 0
    for step in range(all_step):
        optimizer_g = inv_lr_scheduler(param_lr_g, optimizer_g, step,
                                       init_lr=args.lr)
        optimizer_f = inv_lr_scheduler(param_lr_f, optimizer_f, step,
                                       init_lr=args.lr)
        lr = optimizer_f.param_groups[0]['lr']
        if step % len_train_target == 0:
            data_iter_t = iter(target_loader)
        if step % len_train_target_semi == 0:
            data_iter_t_unl = iter(target_loader_unl)
        if step % len_train_source == 0:
            data_iter_s = iter(source_loader)
        data_t = next(data_iter_t)
        data_t_unl = next(data_iter_t_unl)
        data_s = next(data_iter_s)
        im_data_s.data.resize_(data_s[0].size()).copy_(data_s[0])
        gt_labels_s.data.resize_(data_s[1].size()).copy_(data_s[1])
        im_data_t.data.resize_(data_t[0].size()).copy_(data_t[0])
        gt_labels_t.data.resize_(data_t[1].size()).copy_(data_t[1])
        im_data_tu.data.resize_(data_t_unl[0].size()).copy_(data_t_unl[0])
        zero_grad_all()
        data = torch.cat((im_data_s, im_data_t), 0)
        target = torch.cat((gt_labels_s, gt_labels_t), 0)
        output = G(data)
        out1 = F1(output)
        loss = criterion(out1, target)
        loss.backward(retain_graph=True)
        optimizer_g.step()
        optimizer_f.step()
        zero_grad_all()
        if not args.method == 'S+T':
            output = G(im_data_tu)
            if args.method == 'ENT':
                loss_t = entropy(F1, output, args.lamda)
                loss_t.backward()
                optimizer_f.step()
                optimizer_g.step()
            elif args.method == 'MME':
                loss_t = adentropy(F1, output, args.lamda)
                loss_t.backward()
                optimizer_f.step()
                optimizer_g.step()
            else:
                raise ValueError('Method cannot be recognized.')
            log_train = 'S {} T {} Train Ep: {} lr{} \t ' \
                        'Loss Classification: {:.6f} Loss T {:.6f} ' \
                        'Method {}\n'.format(args.source, args.target,
                                             step, lr, loss.data,
                                             -loss_t.data, args.method)
        else:
            log_train = 'S {} T {} Train Ep: {} lr{} \t ' \
                        'Loss Classification: {:.6f} Method {}\n'.\
                format(args.source, args.target,
                       step, lr, loss.data,
                       args.method)
        G.zero_grad()
        F1.zero_grad()
        zero_grad_all()
        if step % args.log_interval == 0:
            print(log_train)
        if step % args.save_interval == 0 and step > 0:
            loss_test, acc_test = test(target_loader_test)
            loss_val, acc_val = test(target_loader_val)
            G.train()
            F1.train()
            if acc_val >= best_acc:
                best_acc = acc_val
                best_acc_test = acc_test
                counter = 0
            else:
                counter += 1
            if args.early:
                if counter > args.patience:
                    break
            print('best acc test %f best acc val %f' % (best_acc_test,
                                                        acc_val))
            print('record %s' % record_file)
            with open(record_file, 'a') as f:
                f.write('step %d best %f final %f \n' % (step,
                                                         best_acc_test,
                                                         acc_val))
            G.train()
            F1.train()
            if args.save_check:
                print('saving model')
                torch.save(G.state_dict(),
                           os.path.join(args.checkpath,
                                        "G_iter_model_{}_{}_"
                                        "to_{}_step_{}.pth.tar".
                                        format(args.method, args.source,
                                               args.target, step)))
                torch.save(F1.state_dict(),
                           os.path.join(args.checkpath,
                                        "F1_iter_model_{}_{}_"
                                        "to_{}_step_{}.pth.tar".
                                        format(args.method, args.source,
                                               args.target, step)))
Example #3
0
def train():
    G.train()
    F1.train()
    optimizer_g = optim.SGD(params,
                            momentum=0.9,
                            lr=args.lr,
                            weight_decay=0.0005,
                            nesterov=True)
    optimizer_f = optim.SGD(list(F1.parameters()),
                            lr=args.lr,
                            momentum=0.9,
                            weight_decay=0.0005,
                            nesterov=True)

    def zero_grad_all():
        optimizer_g.zero_grad()
        optimizer_f.zero_grad()

    param_lr_g = []
    for param_group in optimizer_g.param_groups:
        param_lr_g.append(param_group["lr"])
    param_lr_f = []
    for param_group in optimizer_f.param_groups:
        param_lr_f.append(param_group["lr"])
    criterion = nn.CrossEntropyLoss().cuda()
    all_step = args.steps
    data_iter_s = iter(source_loader)
    data_iter_t = iter(target_loader)
    data_iter_t_unl = iter(target_loader_unl)
    len_train_source = len(source_loader)
    len_train_target = len(target_loader)
    len_train_target_semi = len(target_loader_unl)
    best_acc = 0
    counter = 0

    sch_g = optim.lr_scheduler.StepLR(optimizer_g, 100, 0.8)
    sch_f = optim.lr_scheduler.StepLR(optimizer_f, 100, 0.8)

    # Tensorboard
    writer = SummaryWriter(log_dir=args.checkpath)

    for step in range(all_step):
        #optimizer_g = inv_lr_scheduler(param_lr_g, optimizer_g, step,
        #                               init_lr=args.lr)
        #optimizer_f = inv_lr_scheduler(param_lr_f, optimizer_f, step,
        #                               init_lr=args.lr)
        lr = optimizer_f.param_groups[0]['lr']
        # Tensorboard ; record lr
        writer.add_scalar("Others/lr", lr, step)

        if step % len_train_target == 0:
            data_iter_t = iter(target_loader)
        if step % len_train_target_semi == 0:
            data_iter_t_unl = iter(target_loader_unl)
        if step % len_train_source == 0:
            data_iter_s = iter(source_loader)
        data_t = next(data_iter_t)
        data_t_unl = next(data_iter_t_unl)
        data_s = next(data_iter_s)

        with torch.no_grad():
            im_data_s.resize_(data_s[0].size()).copy_(data_s[0])
            gt_labels_s.resize_(data_s[1].size()).copy_(data_s[1])
            im_data_t.resize_(data_t[0].size()).copy_(data_t[0])
            gt_labels_t.resize_(data_t[1].size()).copy_(data_t[1])
            im_data_tu.resize_(data_t_unl[0].size()).copy_(data_t_unl[0])

        zero_grad_all()
        data = torch.cat((im_data_s, im_data_t), 0)
        target = torch.cat((gt_labels_s, gt_labels_t), 0)
        output = G(data)
        out1 = F1(output)
        loss = criterion(out1, target)
        loss.backward(retain_graph=True)
        optimizer_g.step()
        optimizer_f.step()
        zero_grad_all()
        if not args.method == 'S+T':
            output = G(im_data_tu)
            if args.method == 'ENT':
                loss_t = entropy(F1, output, args.lamda)
                loss_t.backward()
                optimizer_f.step()
                optimizer_g.step()
            elif args.method == 'MME':
                loss_t = adentropy(F1, output, args.lamda)
                loss_t.backward()
                optimizer_f.step()
                optimizer_g.step()
            else:
                raise ValueError('Method cannot be recognized.')
            log_train = 'S: {}; T: {}; Train Ep: {}; lr={} \t ' \
                        'Loss Classification: {:.6f}; Loss T: {:.6f}; ' \
                        'Method: {}\n'.format(args.source, args.target,
                                             step, lr, loss.data,
                                             -loss_t.data, args.method)
            writer.add_scalar("Loss/Entropy_loss", -loss_t.data, step)
        else:
            log_train = 'S: {}; T: {}; Train Ep: {}; lr={} \t ' \
                        'Loss Classification: {:.6f}; Method: {}\n'.\
                format(args.source, args.target,
                       step, lr, loss.data,
                       args.method)
        # Tensorboard ; record lr
        writer.add_scalar("Loss/Classification_loss", loss.data, step)

        G.zero_grad()
        F1.zero_grad()
        zero_grad_all()
        if step % args.log_interval == 0:
            print(log_train)
            sch_g.step()
            sch_f.step()
        if step % args.save_interval == 0 and step > 0:
            print('---------------------------------')
            print('Strain:')
            loss_strain, acc_strain = test(source_loader)
            writer.add_scalar("Accuracy/Source_Testing_Acc", acc_strain, step)
            print('---------------------------------')
            print('Ttrain:')
            loss_unl, acc_unl = test(target_loader_unl)
            writer.add_scalar("Accuracy/Target_Training_Acc", acc_unl, step)
            print('---------------------------------')
            print('Ttest:')
            loss_test, acc_test = test(target_loader_test)
            writer.add_scalar("Accuracy/Target_Testing_Acc", acc_test, step)
            G.train()
            F1.train()
            if acc_unl >= best_acc:
                best_acc = acc_unl
                best_acc_test = acc_test
                counter = 0
            else:
                counter += 1
            if args.early:
                if counter > args.patience:
                    break
            print('best acc test %f best acc unl %f' %
                  (best_acc_test, acc_unl))
            print('record %s' % record_file)
            with open(record_file, 'a') as f:
                f.write('step %d best %f final %f \n' %
                        (step, best_acc_test, acc_unl))
            G.train()
            F1.train()
            if args.save_check:
                print('saving model')
                torch.save(
                    G.state_dict(),
                    os.path.join(
                        args.checkpath,
                        "G_{}_{}.pth".format(args.method, str(int(acc_unl)))))
                torch.save(
                    F1.state_dict(),
                    os.path.join(
                        args.checkpath,
                        "F1_{}_{}.pth".format(args.method, str(int(acc_unl)))))

    writer.close()
Example #4
0
    def train(self):
        self.G.train()
        self.F1.train()
        optimizer_g = optim.SGD(self.params,
                                momentum=0.9,
                                weight_decay=0.0005,
                                nesterov=True)
        optimizer_f = optim.SGD(list(self.F1.parameters()),
                                lr=1.0,
                                momentum=0.9,
                                weight_decay=0.0005,
                                nesterov=True)

        def zero_grad_all():
            optimizer_g.zero_grad()
            optimizer_f.zero_grad()

        param_lr_g = []
        for param_group in optimizer_g.param_groups:
            param_lr_g.append(param_group["lr"])
        param_lr_f = []
        for param_group in optimizer_f.param_groups:
            param_lr_f.append(param_group["lr"])

        criterion = nn.CrossEntropyLoss().cuda()
        all_step = self.args.steps
        data_iter_s = iter(self.source_loader)
        data_iter_t = iter(self.target_loader)
        data_iter_t_unl = iter(self.target_loader_unl)
        len_train_source = len(self.source_loader)
        len_train_target = len(self.target_loader)
        len_train_target_semi = len(self.target_loader_unl)
        for step in range(all_step):
            optimizer_g = inv_lr_scheduler(param_lr_g,
                                           optimizer_g,
                                           step,
                                           init_lr=self.args.lr)
            optimizer_f = inv_lr_scheduler(param_lr_f,
                                           optimizer_f,
                                           step,
                                           init_lr=self.args.lr)

            lr = optimizer_f.param_groups[0]['lr']
            if step % len_train_target == 0:
                data_iter_t = iter(self.target_loader)
            if step % len_train_target_semi == 0:
                data_iter_t_unl = iter(self.target_loader_unl)
            if step % len_train_source == 0:
                data_iter_s = iter(self.source_loader)
            data_t = next(data_iter_t)
            data_t_unl = next(data_iter_t_unl)
            data_s = next(data_iter_s)
            self.im_data_s.resize_(data_s[0].size()).copy_(data_s[0])
            self.gt_labels_s.resize_(data_s[1].size()).copy_(data_s[1])
            self.im_data_t.resize_(data_t[0].size()).copy_(data_t[0])
            self.gt_labels_t.resize_(data_t[1].size()).copy_(data_t[1])
            self.im_data_tu.resize_(data_t_unl[0].size()).copy_(data_t_unl[0])
            zero_grad_all()
            data = torch.cat((self.im_data_s, self.im_data_t), 0)
            target = torch.cat((self.gt_labels_s, self.gt_labels_t), 0)
            output = self.G(data)
            out1 = self.F1(output)
            loss = criterion(out1, target)
            loss.backward(retain_graph=True)
            optimizer_g.step()
            optimizer_f.step()
            zero_grad_all()

            output = self.G(self.im_data_tu)
            loss_t = adentropy(self.F1, output, self.args.lamda)
            loss_t.backward()
            optimizer_f.step()
            optimizer_g.step()

            log_train = 'S {} T {} Train Ep: {} lr{} \t Loss Classification: {:.6f} Method {}\n'.format(
                self.args.source, self.args.target, step, lr, loss.data,
                self.args.method)
            self.G.zero_grad()
            self.F1.zero_grad()

            if step % self.args.log_interval == 0:
                print(log_train)
            if step % self.args.save_interval == 0 and step > 0:
                self.test(self.target_loader_unl)
                self.G.train()
                self.F1.train()
                if self.args.save_check:
                    print('saving model')
                    torch.save(
                        self.G.state_dict(),
                        os.path.join(
                            self.args.checkpath,
                            "G_iter_model_{}_{}_to_{}_step_{}.pth.tar".format(
                                self.args.method, self.args.source,
                                self.args.target, step)))
                    torch.save(
                        self.F1.state_dict(),
                        os.path.join(
                            self.args.checkpath,
                            "F1_iter_model_{}_{}_to_{}_step_{}.pth.tar".format(
                                self.args.method, self.args.source,
                                self.args.target, step)))
Example #5
0
    def train():
        G.train()
        F1.train()
        optimizer_g = optim.SGD(params,
                                momentum=0.9,
                                weight_decay=0.0005,
                                nesterov=True)
        optimizer_f = optim.SGD(list(F1.parameters()),
                                lr=1.0,
                                momentum=0.9,
                                weight_decay=0.0005,
                                nesterov=True)

        # Loading the states of the two optmizers
        optimizer_g.load_state_dict(main_dict['optimizer_g'])
        optimizer_f.load_state_dict(main_dict['optimizer_f'])
        print("Loaded optimizer states")

        def zero_grad_all():
            optimizer_g.zero_grad()
            optimizer_f.zero_grad()

        param_lr_g = []
        for param_group in optimizer_g.param_groups:
            param_lr_g.append(param_group["lr"])
        param_lr_f = []
        for param_group in optimizer_f.param_groups:
            param_lr_f.append(param_group["lr"])

        # Setting the loss function to be used for the classification loss
        if args.loss == 'CE':
            criterion = nn.CrossEntropyLoss().to(device)
        if args.loss == 'FL':
            criterion = FocalLoss(alpha=1, gamma=1).to(device)
        if args.loss == 'CBFL':
            # Calculating the list having the number of examples per class which is going to be used in the CB focal loss
            beta = 0.99
            effective_num = 1.0 - np.power(beta, class_num_list)
            per_cls_weights = (1.0 - beta) / np.array(effective_num)
            per_cls_weights = per_cls_weights / np.sum(per_cls_weights) * len(
                class_num_list)
            per_cls_weights = torch.FloatTensor(per_cls_weights).to(device)
            criterion = CBFocalLoss(weight=per_cls_weights,
                                    gamma=0.5).to(device)

        all_step = args.steps
        data_iter_s = iter(source_loader)
        data_iter_t = iter(target_loader)
        data_iter_t_unl = iter(target_loader_unl)
        len_train_source = len(source_loader)
        len_train_target = len(target_loader)
        len_train_target_semi = len(target_loader_unl)
        best_acc = 0
        counter = 0
        for step in range(all_step):
            optimizer_g = inv_lr_scheduler(param_lr_g,
                                           optimizer_g,
                                           step,
                                           init_lr=args.lr)
            optimizer_f = inv_lr_scheduler(param_lr_f,
                                           optimizer_f,
                                           step,
                                           init_lr=args.lr)
            lr = optimizer_f.param_groups[0]['lr']
            # condition for restarting the iteration for each of the data loaders
            if step % len_train_target == 0:
                data_iter_t = iter(target_loader)
            if step % len_train_target_semi == 0:
                data_iter_t_unl = iter(target_loader_unl)
            if step % len_train_source == 0:
                data_iter_s = iter(source_loader)
            data_t = next(data_iter_t)
            data_t_unl = next(data_iter_t_unl)
            data_s = next(data_iter_s)
            with torch.no_grad():
                im_data_s.resize_(data_s[0].size()).copy_(data_s[0])
                gt_labels_s.resize_(data_s[1].size()).copy_(data_s[1])
                im_data_t.resize_(data_t[0].size()).copy_(data_t[0])
                gt_labels_t.resize_(data_t[1].size()).copy_(data_t[1])
                im_data_tu.resize_(data_t_unl[0].size()).copy_(data_t_unl[0])

            zero_grad_all()
            data = torch.cat((im_data_s, im_data_t), 0)
            target = torch.cat((gt_labels_s, gt_labels_t), 0)
            output = G(data)
            out1 = F1(output)
            loss = criterion(out1, target)
            loss.backward(retain_graph=True)
            optimizer_g.step()
            optimizer_f.step()
            zero_grad_all()
            # list of the weights and image paths in this batch
            img_paths = list(data_t_unl[2])
            df1 = df.loc[df['img'].isin(img_paths)]
            df1 = df1['weight']
            weight_list = list(df1)

            if not args.method == 'S+T':
                output = G(im_data_tu)
                if args.method == 'ENT':
                    loss_t = entropy(F1, output, args.lamda)
                    loss_t.backward()
                    optimizer_f.step()
                    optimizer_g.step()
                elif args.method == 'MME':
                    loss_t = adentropy(F1, output, args.lamda, weight_list)
                    loss_t.backward()
                    optimizer_f.step()
                    optimizer_g.step()
                else:
                    raise ValueError('Method cannot be recognized.')
                log_train = 'S {} T {} Train Ep: {} lr{} \t ' \
                            'Loss Classification: {:.6f} Loss T {:.6f} ' \
                            'Method {}\n'.format(args.source, args.target,
                                                step, lr, loss.data,
                                                -loss_t.data, args.method)
            else:
                log_train = 'S {} T {} Train Ep: {} lr{} \t ' \
                            'Loss Classification: {:.6f} Method {}\n'.\
                    format(args.source, args.target,
                        step, lr, loss.data,
                        args.method)
            G.zero_grad()
            F1.zero_grad()
            zero_grad_all()
            if step % args.log_interval == 0:
                print(log_train)
            if step % args.save_interval == 0 and step > 0:
                loss_val, acc_val = test(target_loader_val)
                loss_test, acc_test = test(target_loader_test)
                G.train()
                F1.train()
                if acc_test >= best_acc:
                    best_acc = acc_test
                    best_acc_test = acc_test
                    counter = 0
                else:
                    counter += 1
                if args.early:
                    if counter > args.patience:
                        break
                print('best acc test %f best acc val %f' %
                      (best_acc_test, acc_val))
                print('record %s' % record_file)
                with open(record_file, 'a') as f:
                    f.write('step %d best %f final %f \n' %
                            (step, best_acc_test, acc_val))
                G.train()
                F1.train()
                #saving model as a checkpoint dict having many things
                if args.save_check:
                    print('saving model')
                    is_best = True if counter == 0 else False
                    save_mymodel(
                        args, {
                            'step': step,
                            'arch': args.net,
                            'G_state_dict': G.state_dict(),
                            'F1_state_dict': F1.state_dict(),
                            'best_acc_test': best_acc_test,
                            'optimizer_g': optimizer_g.state_dict(),
                            'optimizer_f': optimizer_f.state_dict(),
                        }, is_best)
Example #6
0
def train():
    G.train()
    F1.train()
    optimizer_g = optim.SGD(params,
                            momentum=0.9,
                            weight_decay=0.0005,
                            nesterov=True)
    optimizer_f = optim.SGD(list(F1.parameters()),
                            lr=1.0,
                            momentum=0.9,
                            weight_decay=0.0005,
                            nesterov=True)

    def zero_grad_all():
        optimizer_g.zero_grad()
        optimizer_f.zero_grad()

    param_lr_g = []
    for param_group in optimizer_g.param_groups:
        param_lr_g.append(param_group["lr"])
    param_lr_f = []
    for param_group in optimizer_f.param_groups:
        param_lr_f.append(param_group["lr"])

    #criterion = nn.CrossEntropyLoss().cuda()
    beta = 0.99
    effective_num = 1.0 - np.power(beta, class_num_list)
    per_cls_weights = (1.0 - beta) / np.array(effective_num)
    per_cls_weights = per_cls_weights / np.sum(per_cls_weights) * len(
        class_num_list)
    per_cls_weights = torch.FloatTensor(per_cls_weights).cuda()
    criterion = FocalLoss(weight=per_cls_weights, gamma=0.5).cuda()
    all_step = args.steps
    data_iter_s = iter(source_loader)
    data_iter_t = iter(target_loader)
    data_iter_t_unl = iter(target_loader_unl)
    len_train_source = len(source_loader)
    len_train_target = len(target_loader)
    len_train_target_semi = len(target_loader_unl)
    best_acc_test = 0
    counter = 0
    for step in range(all_step):
        optimizer_g = inv_lr_scheduler(param_lr_g,
                                       optimizer_g,
                                       step,
                                       init_lr=args.lr)
        optimizer_f = inv_lr_scheduler(param_lr_f,
                                       optimizer_f,
                                       step,
                                       init_lr=args.lr)
        lr = optimizer_f.param_groups[0]['lr']
        if step % len_train_target == 0:
            data_iter_t = iter(target_loader)
        if step % len_train_target_semi == 0:
            data_iter_t_unl = iter(target_loader_unl)
        if step % len_train_source == 0:
            data_iter_s = iter(source_loader)
        data_t = next(data_iter_t)
        data_t_unl = next(data_iter_t_unl)
        data_s = next(data_iter_s)
        im_data_s.data.resize_(data_s[0].size()).copy_(data_s[0])
        gt_labels_s.data.resize_(data_s[1].size()).copy_(data_s[1])
        im_data_t.data.resize_(data_t[0].size()).copy_(data_t[0])
        gt_labels_t.data.resize_(data_t[1].size()).copy_(data_t[1])
        im_data_tu.data.resize_(data_t_unl[0].size()).copy_(data_t_unl[0])
        zero_grad_all()
        data = torch.cat((im_data_s, im_data_t), 0)
        target = torch.cat((gt_labels_s, gt_labels_t), 0)
        output = G(data)
        out1 = F1(output)
        loss = criterion(out1, target)
        loss.backward(retain_graph=True)
        optimizer_g.step()
        optimizer_f.step()
        zero_grad_all()
        if not args.method == 'S+T':
            output = G(im_data_tu)
            if args.method == 'ENT':
                loss_t = entropy(F1, output, args.lamda)
                loss_t.backward()
                optimizer_f.step()
                optimizer_g.step()
            elif args.method == 'MME':
                loss_t = adentropy(F1, output, args.lamda)
                loss_t.backward()
                optimizer_f.step()
                optimizer_g.step()
            else:
                raise ValueError('Method cannot be recognized.')
            log_train = 'S {} T {} Train Ep: {} lr{} \t ' \
                        'Loss Classification: {:.6f} Loss T {:.6f} ' \
                        'Method {}\n'.format(args.source, args.target,
                                             step, lr, loss.data,
                                             -loss_t.data, args.method)
        else:
            log_train = 'S {} T {} Train Ep: {} lr{} \t ' \
                        'Loss Classification: {:.6f} Method {}\n'.\
                format(args.source, args.target,
                       step, lr, loss.data,
                       args.method)
        G.zero_grad()
        F1.zero_grad()
        zero_grad_all()
        if step % args.log_interval == 0:
            print(log_train)
        if step % args.save_interval == 0 and step > 0:
            loss_test, acc_test = test(target_loader_test)
            loss_val, acc_val = test(target_loader_val)
            G.train()
            F1.train()
            if acc_test > best_acc_test:
                best_acc = acc_val
                best_acc_test = acc_test
                counter = 0
            else:
                counter += 1
            if args.early:
                if counter > args.patience:
                    break
            print('best acc test %f best acc val %f' %
                  (best_acc_test, acc_val))
            print('record %s' % record_file)
            with open(record_file, 'a') as f:
                f.write('step %d best %f final %f \n' %
                        (step, best_acc_test, acc_val))
            G.train()
            F1.train()
            if args.save_check:
                print('saving model...')
                is_best = True if counter == 0 else False
                save_mymodel(
                    args, {
                        'step': step,
                        'arch': args.net,
                        'G_state_dict': G.state_dict(),
                        'F1_state_dict': F1.state_dict(),
                        'best_acc_test': best_acc_test,
                        'optimizer_g': optimizer_g.state_dict(),
                        'optimizer_f': optimizer_f.state_dict(),
                    }, is_best)
Example #7
0
def train():
    G.train()
    F1.train()
    F2.train()
    optimizer_g = optim.SGD(params,
                            momentum=0.9,
                            weight_decay=0.0005,
                            nesterov=True)
    optimizer_f1 = optim.SGD(list(F1.parameters()),
                             lr=1.0,
                             momentum=0.9,
                             weight_decay=0.0005,
                             nesterov=True)
    optimizer_f2 = optim.SGD(list(F2.parameters()),
                             lr=1.0,
                             momentum=0.9,
                             weight_decay=0.0005,
                             nesterov=True)

    def zero_grad_all():
        optimizer_g.zero_grad()
        optimizer_f1.zero_grad()
        optimizer_f2.zero_grad()

    param_lr_g = []
    for param_group in optimizer_g.param_groups:
        param_lr_g.append(param_group["lr"])
    param_lr_f1 = []
    for param_group in optimizer_f1.param_groups:
        param_lr_f1.append(param_group["lr"])
    param_lr_f2 = []
    for param_group in optimizer_f2.param_groups:
        param_lr_f2.append(param_group["lr"])
    criterion = nn.CrossEntropyLoss().cuda()
    all_step = args.steps
    data_iter_s = iter(source_loader)
    data_iter_t = iter(target_loader)
    data_iter_t_unl = iter(target_loader_unl)
    len_train_source = len(source_loader)
    len_train_target = len(target_loader)
    len_train_target_semi = len(target_loader_unl)
    best_acc = 0
    counter = 0
    for step in range(all_step):
        # optimizer_g = inv_lr_scheduler(param_lr_g, optimizer_g, step,
        #                                init_lr=args.lr)
        # optimizer_f1 = inv_lr_scheduler(param_lr_f1, optimizer_f1, step,
        #                                init_lr=args.lr)
        # optimizer_f2 = inv_lr_scheduler(param_lr_f2, optimizer_f2, step,
        #                                 init_lr=args.lr)
        lr = optimizer_f1.param_groups[0]['lr']
        if step % len_train_target == 0:
            data_iter_t = iter(target_loader)
        if step % len_train_target_semi == 0:
            data_iter_t_unl = iter(target_loader_unl)
        if step % len_train_source == 0:
            data_iter_s = iter(source_loader)
        data_t = next(data_iter_t)
        data_t_unl = next(data_iter_t_unl)
        data_s = next(data_iter_s)
        im_data_s.resize_(data_s[0].size()).copy_(data_s[0])
        gt_labels_s.resize_(data_s[1].size()).copy_(data_s[1])
        im_data_t.resize_(data_t[0].size()).copy_(data_t[0])
        gt_labels_t.resize_(data_t[1].size()).copy_(data_t[1])
        im_data_tu.resize_(data_t_unl[0].size()).copy_(data_t_unl[0])
        zero_grad_all()
        data = torch.cat((im_data_s, im_data_t), 0)
        # target = torch.cat((gt_labels_s, gt_labels_t), 0)
        output = G(data)
        output_s = output[:len(im_data_s)]
        output_t = output[len(im_data_s):]
        # output_tu = output[len(im_data_s)+len(im_data_t):]
        out_1t = F1(output_t)
        out_1s = F1(output_s)
        out_2t = F2(output_t)
        out_2s = F2(output_s)

        # out_1tu = F1(output_tu)
        # out_2tu = F2(output_tu)
        #
        # pseudo_label_1 = torch.softmax(out_1tu.detach_(), dim=-1)
        # pseudo_label_2 = torch.softmax(out_2tu.detach_(), dim=-1)
        # max_probs_1, targets_u_1 = torch.max(pseudo_label_1, dim=-1)
        # max_probs_2, targets_u_2 = torch.max(pseudo_label_2, dim=-1)
        # mask = (targets_u_1 == targets_u_2).float()

        ## Source-based Classifier loss: L1
        loss_1t = criterion(
            out_1t, gt_labels_t
        )  #+ (F.cross_entropy(out_1tu, targets_u_1, reduction='none') * mask).mean()

        # mask = torch.cat((torch.ones_like(gt_labels_t).float(), mask), 0)
        # loss_1t = (F.cross_entropy(torch.cat((out_1t, out_1tu), 0),
        #                     torch.cat((gt_labels_t, targets_u_1), 0), reduction='none') * mask).mean()
        loss_1s = criterion(out_1s, gt_labels_s)

        # entropy_s = adentropy(F1, output_s, args.beta)
        # entropy_tu = adentropy(F2, output_tu, args.lamda)

        loss_1 = args.alpha * loss_1s + (1 - args.alpha) * loss_1t

        ## Target-based Classifier loss
        loss_2t = criterion(
            out_2t, gt_labels_t
        )  #+ (F.cross_entropy(out_2tu, targets_u_2, reduction='none') * mask).mean()
        # loss_2t = (F.cross_entropy(torch.cat((out_2t, out_2tu), 0),
        #                  torch.cat((gt_labels_t, targets_u_2), 0), reduction='none') * mask).mean()
        loss_2s = criterion(out_2s, gt_labels_s)

        loss_2 = args.alpha * loss_2t + (1 - args.alpha) * loss_2s

        loss_1.backward(retain_graph=True)
        loss_2.backward(retain_graph=True)
        optimizer_g.step()
        optimizer_f1.step()
        optimizer_f2.step()
        zero_grad_all()

        output = G(torch.cat((im_data_s, im_data_tu), 0))
        output_s = output[:len(im_data_s)]
        output_tu = output[len(im_data_s):]
        entropy_s = adentropy(F1, output_s, args.beta)
        entropy_tu = -adentropy(F2, output_tu, args.lamda)

        entropy = entropy_s + entropy_tu
        entropy.backward(retain_graph=True)
        optimizer_f1.step()
        # entropy_tu.backward(retain_graph=True)
        optimizer_f2.step()
        optimizer_g.step()

        log_train = 'S {} T {} Train Ep: {} lr{} \t ' \
                    'Loss_1 Classification: {:.6f} Loss_2 Classification: {:.6f} ' \
                    'Entropy_S {:.6f} Entropy_TU {:.6f}\n'.format(args.source, args.target,
                                         step, lr, loss_1.data, loss_2.data,
                                         entropy_s.data, entropy_tu.data)
        # G.zero_grad()
        # F1.zero_grad()
        # F2.zero_grad()
        # zero_grad_all()
        if step % args.log_interval == 0:
            print(log_train)
        if step % args.save_interval == 0 and step > 0:
            loss_test, acc_test = test(target_loader_test)
            loss_val, acc_val = test(target_loader_val)
            G.train()
            F1.train()
            F2.train()
            if acc_val >= best_acc:
                best_acc = acc_val
                best_acc_test = acc_test
                counter = 0
            else:
                counter += 1
            if args.early:
                if counter > args.patience:
                    break
            print('best acc test %f best acc val %f' %
                  (best_acc_test, acc_val))
            print('record %s' % record_file)
            with open(record_file, 'a') as f:
                f.write('step %d best %f final %f \n' %
                        (step, best_acc_test, acc_val))
            G.train()
            F1.train()
            F2.train()
            if args.save_check:
                print('saving model')
                torch.save(
                    G.state_dict(),
                    os.path.join(
                        args.checkpath, "G_iter_model_{}_{}_"
                        "to_{}_step_{}.pth.tar".format(args.method,
                                                       args.source,
                                                       args.target, step)))
                torch.save(
                    F2.state_dict(),
                    os.path.join(
                        args.checkpath, "F2_iter_model_{}_{}_"
                        "to_{}_step_{}.pth.tar".format(args.method,
                                                       args.source,
                                                       args.target, step)))