Beispiel #1
0
def cal_acc(loader, netF, netB, netC, flag=False):
    start_test = True
    with torch.no_grad():
        iter_test = iter(loader)
        for i in range(len(loader)):
            data = iter_test.next()
            inputs = data[0]
            labels = data[1]
            inputs = inputs.cuda()
            outputs = netC(netB(netF(inputs)))
            if start_test:
                all_output = outputs.float().cpu()
                all_label = labels.float()
                start_test = False
            else:
                all_output = torch.cat((all_output, outputs.float().cpu()), 0)
                all_label = torch.cat((all_label, labels.float()), 0)
    _, predict = torch.max(all_output, 1)
    accuracy = torch.sum(torch.squeeze(predict).float() == all_label).item() / float(all_label.size()[0])
    mean_ent = torch.mean(loss.Entropy(nn.Softmax(dim=1)(all_output))).cpu().data.item()

    if flag:
        matrix = confusion_matrix(all_label, torch.squeeze(predict).float())
        matrix = matrix[np.unique(all_label).astype(int),:]
        acc = matrix.diagonal()/matrix.sum(axis=1) * 100
        aacc = acc.mean()
        aa = [str(np.round(i, 2)) for i in acc]
        acc = ' '.join(aa)
        return aacc, acc
    else:
        return accuracy*100, mean_ent
Beispiel #2
0
def cal_acc(loader, netF, netB, netC, flag=False):
    start_test = True
    with torch.no_grad():
        iter_test = iter(loader)
        for i in range(len(loader)):
            data = iter_test.next()
            inputs = data[0]
            labels = data[1]
            inputs = inputs.cuda()
            outputs = netC(netB(netF(inputs)))
            if start_test:
                all_output = outputs.float().cpu()
                all_label = labels.float()
                start_test = False
            else:
                all_output = torch.cat((all_output, outputs.float().cpu()), 0)
                all_label = torch.cat((all_label, labels.float()), 0)
    _, predict = torch.max(all_output, 1)
    accuracy = torch.sum(
        torch.squeeze(predict).float() == all_label).item() / float(
            all_label.size()[0])
    mean_ent = torch.mean(loss.Entropy(
        nn.Softmax(dim=1)(all_output))).cpu().data.item()

    if flag:
        matrix = confusion_matrix(all_label, torch.squeeze(predict).float())
        pdb.set_trace()
        acc = matrix.diagonal() / matrix.sum(axis=1)
        return np.mean(acc), acc
    else:
        return accuracy, mean_ent
Beispiel #3
0
def cal_acc(loader, netF, netB, netC):
    start_test = True
    with torch.no_grad():
        iter_test = iter(loader)
        for i in range(len(loader)):
            data = iter_test.next()
            inputs = data[0]
            labels = data[1]
            inputs = inputs.cuda()
            outputs = netC(netB(netF(inputs)))
            if start_test:
                all_output = outputs.float().cpu()
                all_label = labels.float()
                start_test = False
            else:
                all_output = torch.cat((all_output, outputs.float().cpu()), 0)
                all_label = torch.cat((all_label, labels.float()), 0)

    all_output = nn.Softmax(dim=1)(all_output)
    _, predict = torch.max(all_output, 1)
    accuracy = torch.sum(
        torch.squeeze(predict).float() == all_label).item() / float(
            all_label.size()[0])
    mean_ent = torch.mean(loss.Entropy(all_output)).cpu().data.item()

    return accuracy * 100, mean_ent
def cal_acc(loader, net, flag=False):
    start_test = True
    with torch.no_grad():
        iter_test = iter(loader)
        for i in range(len(loader)):
            data = iter_test.next()
            inputs = data[0]
            labels = data[1]
            inputs = inputs.cuda()
            _, outputs = net(inputs)
            if start_test:
                all_output = outputs.float().cpu()
                all_label = labels.float()
                start_test = False
            else:
                all_output = torch.cat((all_output, outputs.float().cpu()), 0)
                all_label = torch.cat((all_label, labels.float()), 0)
    _, predict = torch.max(all_output, 1)
    all_output = nn.Softmax(dim=1)(all_output)
    ent = torch.sum(-all_output * torch.log(all_output + args.epsilon),
                    dim=1) / np.log(all_output.size(1))
    accuracy = torch.sum(
        torch.squeeze(predict).float() == all_label).item() / float(
            all_label.size()[0])
    mean_ent = torch.mean(loss.Entropy(
        nn.Softmax(dim=1)(all_output))).cpu().data.item()

    return accuracy, mean_ent
Beispiel #5
0
def get_ent(oh_final_f):
    oh_final_f = torch.from_numpy(oh_final_f)
    all_output = nn.Softmax(dim=1)(oh_final_f)
    out_ent = loss.Entropy(all_output)
    mean_ent = torch.mean(out_ent)
    out_ent_arr = out_ent.cpu().numpy()
    return mean_ent, out_ent_arr
Beispiel #6
0
def train(args, config, model, ad_net, random_layer, train_loader,
          train_loader1, optimizer, optimizer_ad, epoch):
    model.train()
    len_source = len(train_loader)
    len_target = len(train_loader1)
    if len_source > len_target:
        num_iter = len_source
    else:
        num_iter = len_target
    total_loss = 0
    for batch_idx in range(num_iter):
        if batch_idx % len_source == 0:
            iter_source = iter(train_loader)
        if batch_idx % len_target == 0:
            iter_target = iter(train_loader1)
        data_source, label_source = iter_source.next()
        data_source, label_source = data_source.cuda(), label_source.cuda()
        data_target, label_target = iter_target.next()
        data_target = data_target.cuda()
        optimizer.zero_grad()
        optimizer_ad.zero_grad()

        feature_source, output_source = model(data_source)
        feature_target, output_target = model(data_target)

        feature = torch.cat((feature_source, feature_target), 0)
        output = torch.cat((output_source, output_target), 0)

        labels_target_fake = torch.max(nn.Softmax(dim=1)(output_target), 1)[1]
        labels = torch.cat((label_source, labels_target_fake))

        loss = nn.CrossEntropyLoss()(output.narrow(0, 0, data_source.size(0)),
                                     label_source)

        softmax_output = nn.Softmax(dim=1)(output)
        if epoch > 0:
            entropy = loss_func.Entropy(softmax_output)
            loss += loss_func.CDAN([feature, softmax_output], ad_net, entropy,
                                   network.calc_coeff(num_iter * (epoch - 0) +
                                                      batch_idx), random_layer)

        mdd_loss = args.mdd_weight * loss_func.mdd_digit(
            feature, labels, args.left_weight, args.right_weight, args.weight)
        loss = loss + mdd_loss

        total_loss += loss.data

        loss.backward()
        optimizer.step()
        if epoch > 0:
            optimizer_ad.step()
        if (batch_idx + epoch * num_iter) % args.log_interval == 0:
            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                epoch, batch_idx * args.batch_size, num_iter * args.batch_size,
                100. * batch_idx / num_iter, loss.item()))
    log_str = "total_loss:{}\n".format(total_loss)
    config["out_file"].write(log_str)
    config["out_file"].flush()
    print(log_str)
Beispiel #7
0
def cal_acc(loader, netF=None, netB=None, netC=None, per_class_flag=False, visda_flag=False):
    """Calculate model accuracy on validation set or testing set
    :param loader: dataloader
    :param netF: feature extractor network
    :param netB: bottleneck network
    :param netC: classifier network
    :param per_class_flag: if True: calculatge per-class average accuracy
    :param visda_flag: if True: return acc of each class, else: no need to return acc of each class
    :return: overall acc, per-class average acc, str: acc of each class, mean entropy
    """
    start_test = True
    with torch.no_grad():
        iter_test = iter(loader)
        for i in range(len(loader)):
            data = iter_test.next()
            inputs = data[0]
            labels = data[1]
            inputs = inputs.cuda()

            outputs = netC(netB(netF(inputs)))

            if start_test:
                all_output = outputs.float().cpu()
                all_label = labels.float()
                start_test = False
            else:
                all_output = torch.cat((all_output, outputs.float().cpu()), 0)
                all_label = torch.cat((all_label, labels.float()), 0)

    all_output = nn.Softmax(dim=1)(all_output)
    _, predict = torch.max(all_output, 1)
    accuracy = torch.sum(torch.squeeze(predict).float() == all_label).item() / float(all_label.size()[0])
    accuracy *= 100  # overall accuracy
    mean_ent = torch.mean(loss.Entropy(all_output)).cpu().data.item()  # average entropy of classification results

    if per_class_flag:
        matrix = confusion_matrix(all_label, torch.squeeze(predict).float())
        per_cls_acc_vec = matrix.diagonal() / matrix.sum(axis=1) * 100
        per_cls_avg_acc = per_cls_acc_vec.mean()  # Per-class avg acc
        per_cls_acc_list = [str(np.round(i, 2)) for i in per_cls_acc_vec]
        acc_each_cls = ' '.join(per_cls_acc_list)   # str: acc of each class

    if visda_flag:
        # For VisDA, return acc of each cls to be printed
        # overall acc, acc of each cls: str, per-class avg acc
        return accuracy, acc_each_cls, per_cls_avg_acc

    elif per_class_flag:
        # For Office-Home and DomainNet, no need to return acc of each class
        # overall acc, per-class avg acc, average entropy
        return accuracy, per_cls_avg_acc, mean_ent

    else:
        # overall acc, mean-ent
        return accuracy, mean_ent
Beispiel #8
0
def train_target(args):
    dset_loaders = data_load(args)

    param_group = []    
    model_resnet = network.Res50().cuda()
    for k, v in model_resnet.named_parameters():
        if k.__contains__('fc'):
            v.requires_grad = False
        else:
            param_group += [{'params': v, 'lr': args.lr}]

    optimizer = optim.SGD(param_group, momentum=0.9, weight_decay=5e-4, nesterov=True)

    for epoch in tqdm(range(args.max_epoch), leave=False):

        model_resnet.eval()
        mem_label = obtain_label(dset_loaders['test'], model_resnet, args)
        mem_label = torch.from_numpy(mem_label).cuda()
        model_resnet.train()

        iter_test = iter(dset_loaders['target'])
        for _, (inputs_test, _, tar_idx) in tqdm(enumerate(iter_test), leave=False):
            if inputs_test.size(0) == 1:
                continue
            inputs_test = inputs_test.cuda()

            pred = mem_label[tar_idx]
            features_test, outputs_test = model_resnet(inputs_test)

            classifier_loss = loss.CrossEntropyLabelSmooth(num_classes=args.class_num, epsilon=0)(outputs_test, pred)
            classifier_loss *= args.cls_par

            if args.ent:
                softmax_out = nn.Softmax(dim=1)(outputs_test)
                entropy_loss = torch.mean(loss.Entropy(softmax_out))
                if args.gent:
                    msoftmax = softmax_out.mean(dim=0)
                    gentropy_loss = torch.sum(-msoftmax * torch.log(msoftmax + args.epsilon))
                    entropy_loss -= gentropy_loss
                classifier_loss += entropy_loss * args.ent_par

            optimizer.zero_grad()
            classifier_loss.backward()
            optimizer.step()

        model_resnet.eval()
        acc, ment = cal_acc(dset_loaders['test'], model_resnet)
        log_str = 'Task: {}, Iter:{}/{}; Accuracy = {:.2f}%'.format(args.dset, epoch+1, args.max_epoch, acc*100)
        args.out_file.write(log_str + '\n')
        args.out_file.flush()
        print(log_str+'\n')
    
    # torch.save(model_resnet.state_dict(), osp.join(args.output_dir, 'target.pt'))
    return model_resnet
Beispiel #9
0
def cal_acc(loader, netF, netB, netC, flag=False, threshold=0.1):
    start_test = True
    with torch.no_grad():
        iter_test = iter(loader)
        for i in range(len(loader)):
            data = iter_test.next()
            inputs = data[0]
            labels = data[1]
            inputs = inputs.cuda()
            outputs = netC(netB(netF(inputs)))
            if start_test:
                all_output = outputs.float().cpu()
                all_label = labels.float()
                start_test = False
            else:
                all_output = torch.cat((all_output, outputs.float().cpu()), 0)
                all_label = torch.cat((all_label, labels.float()), 0)
    _, predict = torch.max(all_output, 1)
    accuracy = torch.sum(
        torch.squeeze(predict).float() == all_label).item() / float(
            all_label.size()[0])
    mean_ent = torch.mean(loss.Entropy(
        nn.Softmax(dim=1)(all_output))).cpu().data.item()

    if flag:
        all_output = nn.Softmax(dim=1)(all_output)
        ent = torch.sum(-all_output * torch.log(all_output + args.epsilon),
                        dim=1) / np.log(args.class_num)
        # predict[ent>threshold] = args.class_num

        # from sklearn.mixture import GaussianMixture as GMM
        # gmm = GMM(n_components=2, random_state=0).fit(ent.reshape(-1,1))
        # labels = gmm.predict(ent.reshape(-1,1))

        from sklearn.cluster import KMeans
        kmeans = KMeans(2, random_state=0).fit(ent.reshape(-1, 1))
        labels = kmeans.predict(ent.reshape(-1, 1))

        idx = np.where(labels == 1)[0]
        iidx = 0
        if ent[idx].mean() > ent.mean():
            iidx = 1
        predict[np.where(labels == iidx)[0]] = args.class_num

        matrix = confusion_matrix(all_label, torch.squeeze(predict).float())
        matrix = matrix[np.unique(all_label).astype(int), :]

        acc = matrix.diagonal() / matrix.sum(axis=1) * 100
        unknown_acc = acc[-1:].item()
        # accuracy = torch.sum(torch.squeeze(predict).float() == all_label).item() / float(all_label.size()[0]) * 100
        # print(np.mean(acc[:-1]), np.mean(acc), unknown_acc)
        return np.mean(acc[:-1]), np.mean(acc), unknown_acc
    else:
        return accuracy, mean_ent
Beispiel #10
0
def train(dataloader_src, dataloader_tgt, discriminator, classifier,
          train_epochs, writer):
    discriminator.train()
    classifier.train()

    loss_clf = nn.CrossEntropyLoss()
    # 复习一下:momentum就是上次更新的方向和这次的梯度反向一样,那么这次就加快速度;
    # weight_decay就是 L2 regularization
    optimizer = optim.SGD(itertools.chain(classifier.parameters(),
                                          discriminator.parameters()),
                          lr=1e-3,
                          momentum=0.9,
                          weight_decay=0.0009)
    loss_clf_ = transfer_loss = 0
    for epoch in range(train_epochs):
        for (imgs_src,
             labels_src), (imgs_tgt,
                           labels_tgt) in zip(dataloader_src, dataloader_tgt):
            imgs_src = Variable(imgs_src.type(FloatTensor)).reshape(
                imgs_src.shape[0], -1)
            labels_src = Variable(labels_src.type(LongTensor))

            imgs_tgt = Variable(imgs_tgt.type(FloatTensor)).reshape(
                imgs_tgt.shape[0], -1)
            labels_tgt = Variable(labels_tgt.type(FloatTensor))

            # train source domain
            fea_src, pred_src = classifier(imgs_src)
            fea_tgt, pred_tgt = classifier(imgs_tgt)
            fea = torch.cat((fea_src, fea_tgt), 0)
            pred = torch.cat((pred_src, pred_tgt), 0)

            # 计算概率
            softmax_out = nn.Softmax(dim=1)(pred)

            # 计算熵和discriminator loss
            entropy = loss.Entropy(softmax_out)
            transfer_loss = loss.CDAN([fea, softmax_out], discriminator,
                                      entropy, networks.calc_coeff(epoch))

            # classifier loss
            loss_clf_ = loss_clf(pred_src, labels_src)

            with OptimizerManager([optimizer]):
                total_loss = transfer_loss + loss_clf_
                total_loss.backward()
        if epoch % 5 == 0:
            acc_src, acc_tgt = evaluate(classifier, dataloader_src,
                                        dataloader_tgt)
            writer.add_scalar('Train/loss_c_src', loss_clf_, epoch)
            writer.add_scalar('Train/transfer_loss', transfer_loss, epoch)
            writer.add_scalar('Evaluate/Acc_src', acc_src, epoch)
            writer.add_scalar('Evaluate/Acc_tgt', acc_tgt, epoch)
Beispiel #11
0
def train(args, model, ad_net, random_layer, train_loader, train_loader1,
          optimizer, optimizer_ad, epoch, start_epoch, method):
    model.train()
    len_source = len(train_loader)
    len_target = len(train_loader1)
    if len_source > len_target:
        num_iter = len_source
    else:
        num_iter = len_target

    for batch_idx in range(num_iter):
        if batch_idx % len_source == 0:
            iter_source = iter(train_loader)
        if batch_idx % len_target == 0:
            iter_target = iter(train_loader1)
        data_source, label_source = iter_source.next()
        data_source, label_source = data_source.cuda(), label_source.cuda()
        data_target, label_target = iter_target.next()
        data_target = data_target.cuda()
        optimizer.zero_grad()
        optimizer_ad.zero_grad()
        feature_source, output_source = model(data_source)
        feature_target, output_target = model(data_target)
        feature = torch.cat((feature_source, feature_target), 0)
        output = torch.cat((output_source, output_target), 0)

        labels_target_fake = torch.max(nn.Softmax(dim=1)(output_target), 1)[1]
        labels = torch.cat((label_source, labels_target_fake))

        loss = nn.CrossEntropyLoss()(output.narrow(0, 0, data_source.size(0)),
                                     label_source)
        softmax_output = nn.Softmax(dim=1)(output)
        if epoch > start_epoch:
            entropy = loss_func.Entropy(softmax_output)
            loss += loss_func.CDAN(
                [feature, softmax_output], ad_net, entropy,
                network.calc_coeff(num_iter * (epoch - start_epoch) +
                                   batch_idx), random_layer)

        loss = loss + args.mdd_weight * loss_func.mdd_digit(
            feature, labels
        ) + args.entropic_weight * loss_func.EntropicConfusion(feature)

        loss.backward()
        optimizer.step()
        if epoch > start_epoch:
            optimizer_ad.step()
        if (batch_idx + epoch * num_iter) % args.log_interval == 0:
            print('Train Epoch: {} [{}/{} ({:.4f}%)]\tLoss: {:.6f}'.format(
                epoch, batch_idx * args.batch_size, num_iter * args.batch_size,
                100. * batch_idx / num_iter, loss.item()))
Beispiel #12
0
def train(args, model, ad_net, random_layer, train_loader, train_loader1,
          optimizer, optimizer_ad, epoch, start_epoch, method):
    model.train()
    len_source = len(train_loader)
    len_target = len(train_loader1)
    if len_source > len_target:
        num_iter = len_source
    else:
        num_iter = len_target

    for batch_idx in range(num_iter):
        if batch_idx % len_source == 0:
            iter_source = iter(train_loader)
        if batch_idx % len_target == 0:
            iter_target = iter(train_loader1)
        data_source, label_source = iter_source.next()
        data_source, label_source = data_source.cuda(), label_source.cuda()
        data_target, label_target = iter_target.next()
        data_target = data_target.cuda()
        print('data_source:', data_source.shape, data_target.shape)

        optimizer.zero_grad()
        optimizer_ad.zero_grad()
        feature, output = model(torch.cat((data_source, data_target), 0))
        loss = nn.CrossEntropyLoss()(output.narrow(0, 0, data_source.size(0)),
                                     label_source)
        softmax_output = nn.Softmax(dim=1)(output)
        if epoch > start_epoch:
            if method == 'CDAN-E':
                entropy = loss_func.Entropy(softmax_output)
                loss += loss_func.CDAN(
                    [feature, softmax_output], ad_net, entropy,
                    network.calc_coeff(num_iter * (epoch - start_epoch) +
                                       batch_idx), random_layer)
            elif method == 'CDAN':
                loss += loss_func.CDAN([feature, softmax_output], ad_net, None,
                                       None, random_layer)
            elif method == 'DANN':
                loss += loss_func.DANN(feature, ad_net)
            else:
                raise ValueError('Method cannot be recognized.')
        loss.backward()
        optimizer.step()
        if epoch > start_epoch:
            optimizer_ad.step()
        if (batch_idx + epoch * num_iter) % args.log_interval == 0:
            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                epoch, batch_idx * args.batch_size, num_iter * args.batch_size,
                100. * batch_idx / num_iter, loss.item()))
Beispiel #13
0
def cal_acc(loader, netF, netB, netC, flag=False):
    start_test = True
    with torch.no_grad():
        iter_test = iter(loader)
        for i in range(len(loader)):
            data = iter_test.next()
            inputs = data[0]
            labels = data[1]
            inputs = inputs.cuda()
            outputs = netC(netB(netF(inputs)))
            if start_test:
                all_output = outputs.float().cpu()
                all_label = labels.float()
                start_test = False
            else:
                all_output = torch.cat((all_output, outputs.float().cpu()), 0)
                all_label = torch.cat((all_label, labels.float()), 0)

    all_output = nn.Softmax(dim=1)(all_output)
    _, predict = torch.max(all_output, 1)
    accuracy = torch.sum(
        torch.squeeze(predict).float() == all_label).item() / float(
            all_label.size()[0])
    mean_ent = torch.mean(loss.Entropy(all_output)).cpu().data.item()

    if flag:
        all_output = nn.Softmax(dim=1)(all_output)
        ent = torch.sum(-all_output * torch.log(all_output + args.epsilon),
                        dim=1) / np.log(args.class_num)
        ent = ent.float().cpu()
        initc = np.array([[0], [1]])
        kmeans = KMeans(n_clusters=2, random_state=0, init=initc,
                        n_init=1).fit(ent.reshape(-1, 1))
        threshold = (kmeans.cluster_centers_).mean()

        predict[ent > threshold] = args.class_num

        matrix = confusion_matrix(all_label, torch.squeeze(predict).float())
        matrix = matrix[np.unique(all_label).astype(int), :]

        acc = matrix.diagonal() / matrix.sum(axis=1)
        unknown_acc = acc[-1:].item()
        return np.mean(acc), np.mean(acc[:-1])
    else:
        return accuracy, mean_ent
Beispiel #14
0
def cal_acc(loader, netF, netB, netC):
    k = 0
    start_test = True
    with torch.no_grad():
        iter_test = iter(loader)
        for i in range(len(loader)):
            data = iter_test.next()
            input_images = []
            inputs = data[0]
            inputs_clone = inputs.clone()
            for j in range(inputs_clone.size(0)):
                x = transforms.Normalize((-1, ), (2, ))(inputs_clone[j])
                input_images.append(transforms.ToPILImage()(x))
            labels = data[1]
            outputs = netC(netB(netF(inputs)))
            #
            _, predict = torch.max(outputs.float().cpu(), 1)
            for j in range(inputs.size(0)):
                folder = args.output_dir + '/inspect/label-{}'.format(
                    labels[j])
                if not osp.exists(folder):
                    os.makedirs(folder)
                subfolder = folder + '/pred-{}'.format(predict[j])
                if not osp.exists(subfolder):
                    os.makedirs(subfolder)
                input_images[j].save(subfolder + '/{}.jpg'.format(k))
                k += 1
            #
            if start_test:
                all_output = outputs.float().cpu()
                all_label = labels.float()
                start_test = False
            else:
                all_output = torch.cat((all_output, outputs.float().cpu()), 0)
                all_label = torch.cat((all_label, labels.float()), 0)
    _, predict = torch.max(all_output, 1)
    accuracy = torch.sum(
        torch.squeeze(predict).float() == all_label).item() / float(
            all_label.size()[0])
    mean_ent = torch.mean(loss.Entropy(
        nn.Softmax(dim=1)(all_output))).cpu().data.item()
    return accuracy * 100, mean_ent
Beispiel #15
0
def cal_acc_multi(loader, netF_list, netB_list, netC_list, netG_list, args):
    start_test = True
    with torch.no_grad():
        iter_test = iter(loader)
        for _ in range(len(loader)):
            data = iter_test.next()
            inputs = data[0]
            labels = data[1]
            inputs = inputs.cuda()
            outputs_all = torch.zeros(len(args.src), inputs.shape[0], args.class_num)
            weights_all = torch.ones(inputs.shape[0], len(args.src))
            outputs_all_w = torch.zeros(inputs.shape[0], args.class_num)
            
            for i in range(len(args.src)):
                features = netB_list[i](netF_list[i](inputs))
                outputs = netC_list[i](features)
                weights = netG_list[i](features)
                outputs_all[i] = outputs
                weights_all[:, i] = weights.squeeze()

            z = torch.sum(weights_all, dim=1)
            z = z + 1e-16

            weights_all = torch.transpose(torch.transpose(weights_all,0,1)/z,0,1)
            print(weights_all.mean(dim=0))
            outputs_all = torch.transpose(outputs_all, 0, 1)

            for i in range(inputs.shape[0]):
                outputs_all_w[i] = torch.matmul(torch.transpose(outputs_all[i],0,1), weights_all[i])

            if start_test:
                all_output = outputs_all_w.float().cpu()
                all_label = labels.float()
                start_test = False
            else:
                all_output = torch.cat((all_output, outputs_all_w.float().cpu()), 0)
                all_label = torch.cat((all_label, labels.float()), 0)
    _, predict = torch.max(all_output, 1)
    accuracy = torch.sum(torch.squeeze(predict).float() == all_label).item() / float(all_label.size()[0])
    mean_ent = torch.mean(loss.Entropy(nn.Softmax(dim=1)(all_output))).cpu().data.item()
    return accuracy*100, mean_ent
Beispiel #16
0
def cal_acc(loader, netF, netH, netB, netC, args, flag=False):
    start_test = True
    with torch.no_grad():
        iter_test = iter(loader)
        for i in range(len(loader)):
            data = iter_test.next()
            inputs = data[0]
            labels = data[-1]
            inputs = inputs.cuda()
            labels = labels.cuda()
            if args.layer in ['add_margin', 'arc_margin', 'sphere']:
                labels_forward = labels
            else:
                labels_forward = None
            outputs = netC(netB(netF(inputs)), labels_forward)
            if start_test:
                all_output = outputs.float().cpu()
                all_label = labels.float().cpu()
                start_test = False
            else:
                all_output = torch.cat((all_output, outputs.float().cpu()), 0)
                all_label = torch.cat((all_label, labels.float().cpu()), 0)

    all_output = nn.Softmax(dim=1)(all_output)
    _, predict = torch.max(all_output, 1)
    accuracy = torch.sum(
        torch.squeeze(predict).float() == all_label).item() / float(
            all_label.size()[0])
    mean_ent = torch.mean(loss.Entropy(all_output)).cpu().data.item()

    if flag:
        matrix = confusion_matrix(all_label, torch.squeeze(predict).float())
        acc = matrix.diagonal() / matrix.sum(axis=1) * 100
        aacc = acc.mean()
        aa = [str(np.round(i, 2)) for i in acc]
        acc = ' '.join(aa)
        return aacc, acc
    else:
        return accuracy * 100, mean_ent
Beispiel #17
0
def maxent_step(inputs, netF, netH, netB, netC, optim, epsilon=1e-8):
    netF.train()
    netH.train()
    netB.train()

    optim.zero_grad()

    c3 = netC(netB(netF(inputs)), None)

    softmax_out = nn.Softmax(dim=1)(c3)

    msoftmax = softmax_out.mean(dim=0)

    gentropy = -torch.sum(-msoftmax * torch.log(msoftmax + epsilon))

    with torch.no_grad():
        entropy = torch.mean(loss.Entropy(softmax_out))

    gentropy.backward()
    optim.step()

    return entropy.item(), gentropy.item()
Beispiel #18
0
def cal_acc(loader, netF, netB, netC, visda_flag=False):
    start_test = True
    with torch.no_grad():
        iter_test = iter(loader)
        for i in range(len(loader)):
            data = iter_test.next()
            inputs = data[0]
            labels = data[1]
            inputs = inputs.cuda()
            outputs = netC(netB(netF(inputs)))
            if start_test:
                all_output = outputs.float().cpu()
                all_label = labels.float()
                start_test = False
            else:
                all_output = torch.cat((all_output, outputs.float().cpu()), 0)
                all_label = torch.cat((all_label, labels.float()), 0)
    _, predict = torch.max(all_output, 1)
    accuracy = torch.sum(torch.squeeze(predict).float() == all_label).item() / float(all_label.size()[0])
    accuracy *= 100
    mean_ent = torch.mean(loss.Entropy(nn.Softmax(dim=1)(all_output))).cpu().data.item()

    matrix = confusion_matrix(all_label, torch.squeeze(predict).float())
    per_cls_acc_vec = matrix.diagonal() / matrix.sum(axis=1) * 100
    per_cls_avg_acc = per_cls_acc_vec.mean()    # Per-class avg acc
    per_cls_acc_list = [str(np.round(i, 2)) for i in per_cls_acc_vec]
    acc_each_cls = ' '.join(per_cls_acc_list)

    if visda_flag:
        # For VisDA, return acc of each cls to be printed
        # overall acc, acc of each cls: str, per-class avg acc
        return accuracy, acc_each_cls, per_cls_avg_acc

    else:
        # For other datasets, need not return acc of each cls
        # overall acc, acc of each cls: str, mean-ent
        return accuracy, per_cls_avg_acc, mean_ent
def train(args):

    ent_loss_record = []
    gent_loss_record = []
    sent_loss_record = []
    total_loss_record = []

    dset_loaders = digit_load(args)
    ## set base network
    if args.dset == 'u':
        netF = network.LeNetBase()  #.cuda()
    elif args.dset == 'm':
        netF = network.LeNetBase()  #.cuda()
    elif args.dset == 's':
        netF = network.DTNBase()  #.cuda()

    netB = network.feat_bootleneck(type=args.classifier,
                                   feature_dim=netF.in_features,
                                   bottleneck_dim=args.bottleneck)  #.cuda()
    netC = network.feat_classifier(type=args.layer,
                                   class_num=args.class_num,
                                   bottleneck_dim=args.bottleneck)  #.cuda()

    param_group = []
    learning_rate = args.lr
    for k, v in netF.named_parameters():
        param_group += [{'params': v, 'lr': learning_rate}]
    for k, v in netB.named_parameters():
        param_group += [{'params': v, 'lr': learning_rate}]
    for k, v in netC.named_parameters():
        param_group += [{'params': v, 'lr': learning_rate}]

    optimizer = optim.SGD(param_group)
    optimizer = op_copy(optimizer)

    acc_init = 0
    max_iter = args.max_epoch * len(dset_loaders["train"])
    interval_iter = max_iter // 10
    iter_num = 0

    netF.train()
    netB.train()
    netC.train()

    while iter_num < max_iter:
        try:
            inputs_source, strong_inputs, target = iter_source.next()
        except:
            iter_source = iter(dset_loaders["train"])
            inputs_source, strong_inputs, target = iter_source.next()

        if inputs_source.size(0) == 1:
            continue

        iter_num += 1
        lr_scheduler(optimizer, iter_num=iter_num, max_iter=max_iter)

        inputs_source = inputs_source  #.cuda()
        outputs_source = netC(netB(netF(inputs_source)))

        total_loss = torch.tensor(0.0)  #.cuda()
        softmax_out = nn.Softmax(dim=1)(outputs_source)
        if args.ent:
            ent_loss = torch.mean(loss.Entropy(softmax_out))
            total_loss += ent_loss
            ent_loss_record.append(ent_loss.detach().cpu())

        if args.gent:
            msoftmax = softmax_out.mean(dim=0)
            gent_loss = -torch.sum(-msoftmax * torch.log(msoftmax + 1e-5))
            gent_loss_record.append(gent_loss.detach().cpu())
            total_loss += gent_loss

        if args.sent:
            sent_loss = compute_aug_loss(strong_inputs, target, netC, netB,
                                         netF)
            total_loss += sent_loss
            sent_loss_record.append(sent_loss.detach().cpu())

        optimizer.zero_grad()
        total_loss.backward()
        optimizer.step()
        total_loss_record.append(total_loss.detach().cpu())

        if iter_num % interval_iter == 0 or iter_num == max_iter:
            print(iter_num, interval_iter, max_iter)
        #     netF.eval()
        #     netB.eval()
        #     netC.eval()
        #     acc_s_tr, _ = cal_acc(dset_loaders['train'], netF, netB, netC)
        #     acc_s_te, _ = cal_acc(dset_loaders['test'], netF, netB, netC)
        #     log_str = 'Task: {}, Iter:{}/{}; Accuracy = {:.2f}%/ {:.2f}%'.format(args.dset, iter_num, max_iter, acc_s_tr, acc_s_te)
        #     args.out_file.write(log_str + '\n')
        #     args.out_file.flush()
        #     print(log_str+'\n')

        #     if acc_s_te >= acc_init:
        #         acc_init = acc_s_te
        #         best_netF = netF.state_dict()
        #         best_netB = netB.state_dict()
        #         best_netC = netC.state_dict()

        #     netF.train()
        #     netB.train()
        #     netC.train()

    best_netF = netF.state_dict()
    best_netB = netB.state_dict()
    best_netC = netC.state_dict()

    torch.save(best_netF, osp.join(args.output_dir, "F.pt"))
    torch.save(best_netB, osp.join(args.output_dir, "B.pt"))
    torch.save(best_netC, osp.join(args.output_dir, "C.pt"))

    fig, (ax1, ax2, ax3, ax4) = plt.subplots(nrows=4,
                                             sharex=True,
                                             figsize=(16, 8))
    ax1.plot(list(range(len(ent_loss_record))), ent_loss_record, 'r')
    ax2.plot(list(range(len(gent_loss_record))), gent_loss_record, 'g')
    ax3.plot(list(range(len(sent_loss_record))), sent_loss_record, 'b')
    ax4.plot(list(range(len(total_loss_record))), total_loss_record, 'm')
    plt.tight_layout()
    plt.savefig(args.output_dir + '/loss.png')

    return netF, netB, netC
Beispiel #20
0
def train(config):
    # set pre-process
    prep_config = config["prep"]
    prep_dict = {}
    prep_dict["source"] = prep.image_train(**config["prep"]['params'])
    prep_dict["target"] = prep.image_train(**config["prep"]['params'])
    if prep_config["test_10crop"]:
        prep_dict["test"] = prep.image_test_10crop(**config["prep"]['params'])
    else:
        prep_dict["test"] = prep.image_test(**config["prep"]['params'])

    # prepare data
    dsets = {}
    dset_loaders = {}
    data_config = config["data"]
    train_bs = data_config["source"]["batch_size"]
    test_bs = data_config["test"]["batch_size"]
    dsets["source"] = datasets.ImageFolder(data_config['source']['list_path'], transform=prep_dict["source"])
    dset_loaders['source'] = getdataloader(dsets['source'], batchsize=train_bs, num_workers=4, drop_last=True, weightsampler=True)
    dsets["target"] = datasets.ImageFolder(data_config['target']['list_path'], transform=prep_dict["target"])
    dset_loaders["target"] = DataLoader(dsets["target"], batch_size=train_bs,
                                        shuffle=True, num_workers=4, drop_last=True)

    if prep_config["test_10crop"]:
        for i in range(10):
            dsets["test"] = [datasets.ImageFolder(data_config['test']['list_path'],
                                                  transform=prep_dict["test"][i]) for i in range(10)]
            dset_loaders["test"] = [DataLoader(dset, batch_size=test_bs,
                                               shuffle=False, num_workers=4) for dset in dsets['test']]
    else:
        dsets["test"] = datasets.ImageFolder(data_config['test']['list_path'],
                                             transform=prep_dict["test"])
        dset_loaders["test"] = DataLoader(dsets["test"], batch_size=test_bs,
                                          shuffle=False, num_workers=4)

    class_num = config["network"]["params"]["class_num"]

    # set base network
    net_config = config["network"]
    base_network = net_config["name"](**net_config["params"])
    base_network = base_network.cuda()

    # set test_ad_net
    test_ad_net = network.AdversarialNetwork(base_network.output_num(), 1024, test_ad_net=True)
    test_ad_net = test_ad_net.cuda()

    # add additional network for some methods
    if config['method'] == 'DANN':
        random_layer = None
        ad_net = network.AdversarialNetwork(base_network.output_num(), 1024)
    elif config['method'] == 'MADA':
        random_layer = None
        ad_net = network.AdversarialNetworkClassGroup(base_network.output_num(), 1024, class_num)
    elif config['method'] == 'proposed':
        if config['loss']['random']:
            random_layer = network.RandomLayer([base_network.output_num(), class_num], config['loss']['random_dim'])
            ad_net = network.AdversarialNetwork(config['loss']['random_dim'], 1024)
            ad_net_group = network.AdversarialNetworkGroup(config['loss']['random_dim'], 256, class_num, config['center_threshold'])
        else:
            random_layer = None
            ad_net = network.AdversarialNetwork(base_network.output_num(), 1024)
            ad_net_group = network.AdversarialNetworkGroup(base_network.output_num(), 1024, class_num, config['center_threshold'])
    elif config['method'] == 'base':
        pass
    else:
        if config["loss"]["random"]:
            random_layer = network.RandomLayer([base_network.output_num(), class_num], config["loss"]["random_dim"])
            ad_net = network.AdversarialNetwork(config["loss"]["random_dim"], 1024)
        else:
            random_layer = None
            ad_net = network.AdversarialNetwork(base_network.output_num() * class_num, 1024)
    if config["loss"]["random"] and config['method'] != 'base' and config['method'] != 'DANN' and config['method'] != 'MADA':
        random_layer.cuda()
    if config['method'] != 'base':
        ad_net = ad_net.cuda()
    if config['method'] == 'proposed':
        ad_net_group = ad_net_group.cuda()

    # set parameters
    if config['method'] == 'proposed':
        parameter_list = base_network.get_parameters() + test_ad_net.get_parameters() + ad_net.get_parameters() + ad_net_group.get_parameters()
    elif config['method'] == 'base':
        parameter_list = base_network.get_parameters() + test_ad_net.get_parameters()
    elif config['method'] == 'MADA':
        parameter_list = base_network.get_parameters() + test_ad_net.get_parameters() + ad_net.get_parameters()
    else:
        parameter_list = base_network.get_parameters() + test_ad_net.get_parameters() + ad_net.get_parameters()

    # set optimizer
    optimizer_config = config["optimizer"]
    optimizer = optimizer_config["type"](parameter_list, **(optimizer_config["optim_params"]))
    param_lr = []
    for param_group in optimizer.param_groups:
        param_lr.append(param_group["lr"])
    schedule_param = optimizer_config["lr_param"]
    lr_scheduler = lr_schedule.schedule_dict[optimizer_config["lr_type"]]

    # parallel
    gpus = config['gpu'].split(',')
    if len(gpus) > 1:
        base_network = nn.DataParallel(base_network)
        test_ad_net = nn.DataParallel(test_ad_net)
        if config['method'] == 'DANN':
            ad_net = nn.DataParallel(ad_net)
        elif config['method'] == 'proposed':
            if config['loss']['random']:
                random_layer = nn.DataParallel(random_layer)
                ad_net = nn.DataParallel(ad_net)
                #将ad_net_group设置成并行将会引发error,原因可能是由于ad_net_group的输出不是tensor类型,parallel还不能支持。
                #ad_net_group = nn.DataParallel(ad_net_group)
            else:
                ad_net = nn.DataParallel(ad_net)
                #ad_net_group = nn.DataParallel(ad_net_group)
        elif config['method'] == 'base':
            pass
        else:
            # CDAN+E
            if config["loss"]["random"]:
                random_layer = nn.DataParallel(random_layer)
                ad_net = nn.DataParallel(ad_net)
            # CDAN
            else:
                ad_net = nn.DataParallel(ad_net)

    ## train
    len_train_source = len(dset_loaders["source"])
    len_train_target = len(dset_loaders["target"])
    transfer_loss_value = classifier_loss_value = total_loss_value = 0.0
    best_acc = 0.0
    for i in range(config["num_iterations"]):
        if i % config["test_interval"] == config["test_interval"] - 1:
            base_network.train(False)  # eval() == train(False) is True
            temp_acc = image_classification_test(dset_loaders, base_network, test_10crop=prep_config["test_10crop"])
            temp_model = nn.Sequential(base_network)
            if temp_acc > best_acc:
                best_acc = temp_acc
                best_model = temp_model
            log_str = "iter: {:05d}, precision: {:.5f}".format(i, temp_acc)
            config["out_file"].write(log_str + "\n")
            config["out_file"].flush()
            print(log_str)
        # if i % config["snapshot_interval"] == 0:
        #     torch.save(nn.Sequential(base_network), osp.join(config["output_path"],
        #                                                      "iter_{:05d}_model.pth.tar".format(i)))

        loss_params = config["loss"]
        # train one iter
        base_network.train(True)
        if config['method'] != 'base':
            ad_net.train(True)
        if config['method'] == 'proposed':
            ad_net_group.train(True)
        # lr_scheduler
        optimizer = lr_scheduler(optimizer, i, **schedule_param)
        optimizer.zero_grad()
        if i % len_train_source == 0:
            iter_source = iter(dset_loaders["source"])
        if i % len_train_target == 0:
            iter_target = iter(dset_loaders["target"])
        inputs_source, labels_source = iter_source.next()
        inputs_target, labels_target = iter_target.next()
        inputs_source, inputs_target, labels_source = inputs_source.cuda(), inputs_target.cuda(), labels_source.cuda()
        features_source, outputs_source = base_network(inputs_source)
        features_target, outputs_target = base_network(inputs_target)
        if config['tsne']:
            # feature visualization by using T-SNE
            if i == int(0.98*config['num_iterations']):
                features_source_total = features_source.cpu().detach().numpy()
                features_target_total = features_target.cpu().detach().numpy()
            elif i > int(0.98*config['num_iterations']) and i < int(0.98*config['num_iterations'])+10:
                features_source_total = np.concatenate((features_source_total, features_source.cpu().detach().numpy()))
                features_target_total = np.concatenate((features_target_total, features_target.cpu().detach().numpy()))
            elif i == int(0.98*config['num_iterations'])+10:
                for index in range(config['tsne_num']):
                    features_embeded = TSNE(perplexity=10,n_iter=5000).fit_transform(np.concatenate((features_source_total, features_target_total)))
                    fig = plt.figure()
                    plt.scatter(features_embeded[:len(features_embeded)//2, 0], features_embeded[:len(features_embeded)//2, 1], c='r', s=1)
                    plt.scatter(features_embeded[len(features_embeded)//2:, 0], features_embeded[len(features_embeded)//2:, 1], c='b', s=1)
                    plt.savefig(osp.join(config["output_path"], config['method']+'-'+str(index)+'.png'))
                    plt.close()
            else:
                pass

        assert features_source.size(0) == features_target.size(0), 'The batchsize must be same'
        assert outputs_source.size(0) == outputs_target.size(0), 'The batchsize must be same'
        # source first, target second
        features = torch.cat((features_source, features_target), dim=0)
        outputs = torch.cat((outputs_source, outputs_target), dim=0)

        # output the A_distance
        if i % config["test_interval"] == config["test_interval"] - 1:
            A_distance = cal_A_distance(test_ad_net, features)
            config['A_distance_file'].write(str(A_distance)+'\n')
            config['A_distance_file'].flush()

        softmax_out = nn.Softmax(dim=1)(outputs)
        if config['method'] == 'CDAN+E':
            entropy = loss.Entropy(softmax_out)
            transfer_loss = loss.CDAN([features, softmax_out], ad_net, entropy, network.calc_coeff(i), random_layer)
        elif config['method'] == 'CDAN':
            transfer_loss = loss.CDAN([features, softmax_out], ad_net, None, None, random_layer)
        elif config['method'] == 'DANN':
            transfer_loss = loss.DANN(features, ad_net)
        elif config['method'] == 'MADA':
            transfer_loss = loss.MADA(features, softmax_out, ad_net)
        elif config['method'] == 'proposed':
            entropy = loss.Entropy(softmax_out)
            transfer_loss = loss.proposed([features, outputs], labels_source, ad_net, ad_net_group, entropy,
                                          network.calc_coeff(i), i, random_layer, config['loss']['trade_off23'])
        elif config['method'] == 'base':
            pass
        else:
            raise ValueError('Method cannot be recognized.')
        test_domain_loss = loss.DANN(features.clone().detach(), test_ad_net)
        classifier_loss = nn.CrossEntropyLoss()(outputs_source, labels_source)
        if config['method'] == 'base':
            total_loss = classifier_loss + test_domain_loss
        else:
            total_loss = loss_params["trade_off"] * transfer_loss + classifier_loss + test_domain_loss
        total_loss.backward()
        optimizer.step()
    # torch.save(best_model, osp.join(config["output_path"], "best_model.pth.tar"))
    return best_acc
Beispiel #21
0
def train_target(args):
    dset_loaders = data_load(args)
    if args.net[0:3] == 'res':
        netF = network.ResBase(res_name=args.net).cuda()
    elif args.net[0:3] == 'vgg':
        netF = network.VGGBase(vgg_name=args.net).cuda()

    netB = network.feat_bootleneck(type=args.classifier,
                                   feature_dim=netF.in_features,
                                   bottleneck_dim=args.bottleneck).cuda()
    netC = network.feat_classifier(type=args.layer,
                                   class_num=args.class_num,
                                   bottleneck_dim=args.bottleneck).cuda()

    modelpath = args.output_dir_src + '/source_F.pt'
    netF.load_state_dict(torch.load(modelpath))
    modelpath = args.output_dir_src + '/source_B.pt'
    netB.load_state_dict(torch.load(modelpath))
    modelpath = args.output_dir_src + '/source_C.pt'
    netC.load_state_dict(torch.load(modelpath))
    netC.eval()
    for k, v in netC.named_parameters():
        v.requires_grad = False

    param_group = []
    for k, v in netF.named_parameters():
        if args.lr_decay1 > 0:
            param_group += [{'params': v, 'lr': args.lr * args.lr_decay1}]
        else:
            v.requires_grad = False
    for k, v in netB.named_parameters():
        if args.lr_decay2 > 0:
            param_group += [{'params': v, 'lr': args.lr * args.lr_decay2}]
        else:
            v.requires_grad = False

    optimizer = optim.SGD(param_group)
    optimizer = op_copy(optimizer)

    max_iter = args.max_epoch * len(dset_loaders["target"])
    interval_iter = max_iter // args.interval
    iter_num = 0

    iter_sw = int(max_iter / 2.0)

    while iter_num < max_iter:
        try:
            inputs_test, _, tar_idx = iter_test.next()
        except:
            iter_test = iter(dset_loaders["target"])
            inputs_test, _, tar_idx = iter_test.next()

        if inputs_test.size(0) == 1:
            continue

        if iter_num % interval_iter == 0 and args.cls_par > 0:
            netF.eval()
            netB.eval()
            mem_label_soft, mtx_infor_nh, feas_FC = obtain_label(
                dset_loaders['test'], netF, netB, netC, args, iter_num,
                iter_sw)
            mem_label_soft = torch.from_numpy(mem_label_soft).cuda()
            feas_all = feas_FC[0]
            ops_all = feas_FC[1]
            netF.train()
            netB.train()

        inputs_test = inputs_test.cuda()
        iter_num += 1
        lr_scheduler(optimizer, iter_num=iter_num, max_iter=max_iter)

        features_F_self = netF(inputs_test)
        features_F_nh = get_mtx_sam_wgt_nh(feas_all, mtx_infor_nh, tar_idx)
        features_F_nh = features_F_nh.cuda()
        features_F_mix = 0.8 * features_F_self + 0.2 * features_F_nh
        outputs_test_mix = netC(netB(features_F_mix))
        ops_test_self = netC(netB(features_F_self))
        outputs_test_nh = netC(netB(features_F_nh))

        if args.cls_par > 0:
            log_probs = nn.LogSoftmax(dim=1)(outputs_test_mix)
            targets = mem_label_soft[tar_idx]
            loss_soft = (-targets * log_probs).sum(dim=1)
            classifier_loss = loss_soft.mean()

            classifier_loss *= args.cls_par
            if iter_num < interval_iter and args.dset == "VISDA-C":
                classifier_loss *= 0
        else:
            classifier_loss = torch.tensor(0.0).cuda()

        if args.ent:
            softmax_out = nn.Softmax(dim=1)(
                outputs_test_mix)  # outputs_test_mix
            entropy_loss = torch.mean(loss.Entropy(softmax_out))

            if args.gent:
                msoftmax = softmax_out.mean(dim=0)
                gentropy_loss = torch.sum(-msoftmax *
                                          torch.log(msoftmax + args.epsilon))
                entropy_loss -= gentropy_loss
            im_loss = entropy_loss * args.ent_par
            classifier_loss += im_loss

        optimizer.zero_grad()
        classifier_loss.backward()
        optimizer.step()

        if iter_num % interval_iter == 0 or iter_num == max_iter:
            netF.eval()
            netB.eval()
            if args.dset == 'VISDA-C':
                acc_s_te, acc_list = cal_acc(dset_loaders['test'], netF, netB,
                                             netC, True)
                log_str = 'Task: {}, Iter:{}/{}; Accuracy = {:.2f}%'.format(
                    args.name, iter_num, max_iter, acc_s_te) + '\n' + acc_list
            else:
                acc_s_te, _ = cal_acc(dset_loaders['test'], netF, netB, netC,
                                      False)
                log_str = 'Task: {}, Iter:{}/{}; Accuracy = {:.2f}%'.format(
                    args.name, iter_num, max_iter, acc_s_te)

            args.out_file.write(log_str + '\n')
            args.out_file.flush()
            print(log_str + '\n')
            netF.train()
            netB.train()

    if args.issave:
        torch.save(
            netF.state_dict(),
            osp.join(args.output_dir, "target_F_" + args.savename + ".pt"))
        torch.save(
            netB.state_dict(),
            osp.join(args.output_dir, "target_B_" + args.savename + ".pt"))
        torch.save(
            netC.state_dict(),
            osp.join(args.output_dir, "target_C_" + args.savename + ".pt"))

    return netF, netB, netC
Beispiel #22
0
def train_target(args):
    dset_loaders = data_load(args)
    ## set base network
    if args.net[0:3] == 'res':
        netF = network.ResBase(res_name=args.net).cuda()
    elif args.net[0:3] == 'vgg':
        netF = network.VGGBase(vgg_name=args.net).cuda()  

    netB = network.feat_bootleneck(type=args.classifier, feature_dim=netF.in_features, bottleneck_dim=args.bottleneck).cuda()
    netC = network.feat_classifier(type=args.layer, class_num = args.class_num, bottleneck_dim=args.bottleneck).cuda()
    
    if not args.ssl == 0:
        netR = network.feat_classifier(type='linear', class_num=4, bottleneck_dim=2*args.bottleneck).cuda()
        netR_dict, acc_rot = train_target_rot(args)
        netR.load_state_dict(netR_dict)
    
    modelpath = args.output_dir_src + '/source_F.pt'   
    netF.load_state_dict(torch.load(modelpath))
    modelpath = args.output_dir_src + '/source_B.pt'   
    netB.load_state_dict(torch.load(modelpath))
    modelpath = args.output_dir_src + '/source_C.pt'    
    netC.load_state_dict(torch.load(modelpath))
    netC.eval()
    for k, v in netC.named_parameters():
        v.requires_grad = False

    param_group = []
    for k, v in netF.named_parameters():
        if args.lr_decay1 > 0:
            param_group += [{'params': v, 'lr': args.lr * args.lr_decay1}]
        else:
            v.requires_grad = False
    for k, v in netB.named_parameters():
        if args.lr_decay2 > 0:
            param_group += [{'params': v, 'lr': args.lr * args.lr_decay2}]
        else:
            v.requires_grad = False
    if not args.ssl == 0:
        for k, v in netR.named_parameters():
            param_group += [{'params': v, 'lr': args.lr * args.lr_decay2}]
        netR.train()

    optimizer = optim.SGD(param_group)
    optimizer = op_copy(optimizer)

    max_iter = args.max_epoch * len(dset_loaders["target"])
    interval_iter = max_iter // args.interval
    iter_num = 0

    while iter_num < max_iter:
        optimizer.zero_grad()
        try:
            inputs_test, _, tar_idx = iter_test.next()
        except:
            iter_test = iter(dset_loaders["target"])
            inputs_test, _, tar_idx = iter_test.next()

        if inputs_test.size(0) == 1:
            continue

        if iter_num % interval_iter == 0 and args.cls_par > 0:
            netF.eval()
            netB.eval()
            mem_label = obtain_label(dset_loaders['target_te'], netF, netB, netC, args)
            mem_label = torch.from_numpy(mem_label).cuda()
            netF.train()
            netB.train()

        inputs_test = inputs_test.cuda()

        iter_num += 1
        lr_scheduler(optimizer, iter_num=iter_num, max_iter=max_iter)
        if args.cls_par > 0:
            pred = mem_label[tar_idx]

        features_test = netB(netF(inputs_test))
        outputs_test = netC(features_test)

        if args.cls_par > 0:
            classifier_loss = nn.CrossEntropyLoss()(outputs_test, pred)
            classifier_loss *= args.cls_par
            if iter_num < interval_iter and args.dset == "VISDA-C":
                classifier_loss *= 0
        else:
            classifier_loss = torch.tensor(0.0).cuda()

        if args.ent:
            softmax_out = nn.Softmax(dim=1)(outputs_test)
            entropy_loss = torch.mean(loss.Entropy(softmax_out))
            if args.gent:
                msoftmax = softmax_out.mean(dim=0)
                gentropy_loss = torch.sum(-msoftmax * torch.log(msoftmax + args.epsilon))
                entropy_loss -= gentropy_loss
            im_loss = entropy_loss * args.ent_par
            classifier_loss += im_loss

        classifier_loss.backward()

        if not args.ssl == 0:
            r_labels_target = np.random.randint(0, 4, len(inputs_test))
            r_inputs_target = rotation.rotate_batch_with_labels(inputs_test, r_labels_target)
            r_labels_target = torch.from_numpy(r_labels_target).cuda()
            r_inputs_target = r_inputs_target.cuda()

            f_outputs = netB(netF(inputs_test))
            f_outputs = f_outputs.detach()
            f_r_outputs = netB(netF(r_inputs_target))
            r_outputs_target = netR(torch.cat((f_outputs, f_r_outputs), 1))

            rotation_loss = args.ssl * nn.CrossEntropyLoss()(r_outputs_target, r_labels_target)   
            rotation_loss.backward() 

        optimizer.step()

        if iter_num % interval_iter == 0 or iter_num == max_iter:
            netF.eval()
            netB.eval()
            if args.dset=='VISDA-C':
                acc_s_te, acc_list = cal_acc(dset_loaders['test'], netF, netB, netC, True)
                log_str = 'Task: {}, Iter:{}/{}; Accuracy = {:.2f}%'.format(args.name, iter_num, max_iter, acc_s_te) + '\n' + acc_list
            else:
                acc_s_te, _ = cal_acc(dset_loaders['test'], netF, netB, netC, False)
                log_str = 'Task: {}, Iter:{}/{}; Accuracy = {:.2f}%'.format(args.name, iter_num, max_iter, acc_s_te)

            args.out_file.write(log_str + '\n')
            args.out_file.flush()
            print(log_str+'\n')
            netF.train()
            netB.train()

    if args.issave:   
        torch.save(netF.state_dict(), osp.join(args.output_dir, "target_F_" + args.savename + ".pt"))
        torch.save(netB.state_dict(), osp.join(args.output_dir, "target_B_" + args.savename + ".pt"))
        torch.save(netC.state_dict(), osp.join(args.output_dir, "target_C_" + args.savename + ".pt"))
        
    return netF, netB, netC
Beispiel #23
0
def train_target(args):
    dset_loaders = data_load(args)
    ## set base network
    if args.net[0:3] == 'res':
        netF = network.ResBase(res_name=args.net).cuda()
    elif args.net[0:3] == 'vgg':
        netF = network.VGGBase(vgg_name=args.net).cuda()

    netB = network.feat_bootleneck(type=args.classifier,
                                   feature_dim=netF.in_features,
                                   bottleneck_dim=args.bottleneck).cuda()
    netC = network.feat_classifier(type=args.layer,
                                   class_num=args.class_num,
                                   bottleneck_dim=args.bottleneck).cuda()

    args.modelpath = args.output_dir_src + '/source_F.pt'
    netF.load_state_dict(torch.load(args.modelpath))
    args.modelpath = args.output_dir_src + '/source_B.pt'
    netB.load_state_dict(torch.load(args.modelpath))
    args.modelpath = args.output_dir_src + '/source_C.pt'
    netC.load_state_dict(torch.load(args.modelpath))
    netC.eval()
    for k, v in netC.named_parameters():
        v.requires_grad = False

    param_group = []
    for k, v in netF.named_parameters():
        if args.lr_decay1 > 0:
            param_group += [{'params': v, 'lr': args.lr * args.lr_decay1}]
        else:
            v.requires_grad = False
    for k, v in netB.named_parameters():
        if args.lr_decay2 > 0:
            param_group += [{'params': v, 'lr': args.lr * args.lr_decay2}]
        else:
            v.requires_grad = False

    optimizer = optim.SGD(param_group)
    optimizer = op_copy(optimizer)

    tt = 0
    iter_num = 0
    max_iter = args.max_epoch * len(dset_loaders["target"])
    interval_iter = max_iter // args.interval

    while iter_num < max_iter:
        try:
            inputs_test, _, tar_idx = iter_test.next()
        except:
            iter_test = iter(dset_loaders["target"])
            inputs_test, _, tar_idx = iter_test.next()

        if inputs_test.size(0) == 1:
            continue

        if iter_num % interval_iter == 0:
            netF.eval()
            netB.eval()
            mem_label, ENT_THRESHOLD = obtain_label(dset_loaders['test'], netF,
                                                    netB, netC, args)
            mem_label = torch.from_numpy(mem_label).cuda()
            netF.train()
            netB.train()

        inputs_test = inputs_test.cuda()

        iter_num += 1
        lr_scheduler(optimizer, iter_num=iter_num, max_iter=max_iter)

        pred = mem_label[tar_idx]
        features_test = netB(netF(inputs_test))
        outputs_test = netC(features_test)

        softmax_out = nn.Softmax(dim=1)(outputs_test)
        outputs_test_known = outputs_test[pred < args.class_num, :]
        pred = pred[pred < args.class_num]

        if len(pred) == 0:
            print(tt)
            del features_test
            del outputs_test
            tt += 1
            continue

        if args.cls_par > 0:
            classifier_loss = nn.CrossEntropyLoss()(outputs_test_known, pred)
            classifier_loss *= args.cls_par
        else:
            classifier_loss = torch.tensor(0.0).cuda()

        if args.ent:
            softmax_out_known = nn.Softmax(dim=1)(outputs_test_known)
            entropy_loss = torch.mean(loss.Entropy(softmax_out_known))
            if args.gent:
                msoftmax = softmax_out.mean(dim=0)
                gentropy_loss = torch.sum(-msoftmax *
                                          torch.log(msoftmax + args.epsilon))
                entropy_loss -= gentropy_loss
            classifier_loss += entropy_loss * args.ent_par

        optimizer.zero_grad()
        classifier_loss.backward()
        optimizer.step()

        if iter_num % interval_iter == 0 or iter_num == max_iter:
            netF.eval()
            netB.eval()
            acc_os1, acc_os2, acc_unknown = cal_acc(dset_loaders['test'], netF,
                                                    netB, netC, True,
                                                    ENT_THRESHOLD)
            log_str = 'Task: {}, Iter:{}/{}; Accuracy = {:.2f}% / {:.2f}% / {:.2f}%'.format(
                args.name, iter_num, max_iter, acc_os2, acc_os1, acc_unknown)
            args.out_file.write(log_str + '\n')
            args.out_file.flush()
            print(log_str + '\n')
            netF.train()
            netB.train()

    if args.issave:
        torch.save(
            netF.state_dict(),
            osp.join(args.output_dir, "target_F_" + args.savename + ".pt"))
        torch.save(
            netB.state_dict(),
            osp.join(args.output_dir, "target_B_" + args.savename + ".pt"))
        torch.save(
            netC.state_dict(),
            osp.join(args.output_dir, "target_C_" + args.savename + ".pt"))

    return netF, netB, netC
def train(args, model, ad_net, random_layer, train_loader, train_loader1, optimizer, optimizer_ad, epoch, start_epoch, method,
          D_s, D_t, G_s2t, G_t2s, criterion_Sem, criterion_GAN, criterion_cycle, criterion_identity, optimizer_G,
          optimizer_D_t, optimizer_D_s,
          classifier1, classifier1_optim, fake_S_buffer, fake_T_buffer):
    model.train()
    len_source = len(train_loader)
    len_target = len(train_loader1)
    if len_source > len_target:
        num_iter = len_source
    else:
        num_iter = len_target
    
    for batch_idx in range(num_iter):
        if batch_idx % len_source == 0:
            iter_source = iter(train_loader)    
        if batch_idx % len_target == 0:
            iter_target = iter(train_loader1)
        data_source, label_source = iter_source.next()
        # data_source, label_source = data_source.cuda(), label_source.cuda()
        data_target, label_target = iter_target.next()
        # data_target = data_target.cuda()
        optimizer.zero_grad()
        optimizer_ad.zero_grad()

        features_source, outputs_source = model(data_source)
        features_target, outputs_target = model(data_target)
        features = torch.cat((features_source, features_target), dim=0)
        outputs = torch.cat((outputs_source, outputs_target), dim=0)

        loss = nn.CrossEntropyLoss()(outputs.narrow(0, 0, data_source.size(0)), label_source)
        softmax_output = nn.Softmax(dim=1)(outputs)

        output1 = classifier1(features)
        softmax_output1 = nn.Softmax(dim=1)(output1)
        softmax_output = (1 - args.cla_plus_weight) * softmax_output + args.cla_plus_weight * softmax_output1

        if epoch > start_epoch:
            if method == 'CDAN-E':
                entropy = loss_func.Entropy(softmax_output)
                loss += loss_func.CDAN([features, softmax_output], ad_net, entropy, network.calc_coeff(num_iter*(epoch-start_epoch)+batch_idx), random_layer)
            elif method == 'CDAN':
                loss += loss_func.CDAN([features, softmax_output], ad_net, None, None, random_layer)
            elif method == 'DANN':
                loss += loss_func.DANN(features, ad_net)
            else:
                raise ValueError('Method cannot be recognized.')
        # Cycle
        num_feature = features.size(0)
        # =================train discriminator T
        real_label = Variable(torch.ones(num_feature))
        # real_label = Variable(torch.ones(num_feature)).cuda()
        fake_label = Variable(torch.zeros(num_feature))
        # fake_label = Variable(torch.zeros(num_feature)).cuda()

        # 训练生成器
        optimizer_G.zero_grad()

        # Identity loss
        same_t = G_s2t(features_target)
        loss_identity_t = criterion_identity(same_t, features_target)

        same_s = G_t2s(features_source)
        loss_identity_s = criterion_identity(same_s, features_source)

        # Gan loss
        fake_t = G_s2t(features_source)
        pred_fake = D_t(fake_t)
        loss_G_s2t = criterion_GAN(pred_fake, label_source.float())

        fake_s = G_t2s(features_target)
        pred_fake = D_s(fake_s)
        loss_G_t2s = criterion_GAN(pred_fake, label_source.float())

        # cycle loss
        recovered_s = G_t2s(fake_t)
        loss_cycle_sts = criterion_cycle(recovered_s, features_source)

        recovered_t = G_s2t(fake_s)
        loss_cycle_tst = criterion_cycle(recovered_t, features_target)

        # sem loss
        pred_recovered_s = model.classifier(recovered_s)
        pred_fake_t = model.classifier(fake_t)
        loss_sem_t2s = criterion_Sem(pred_recovered_s, pred_fake_t)

        pred_recovered_t = model.classifier(recovered_t)
        pred_fake_s = model.classifier(fake_s)
        loss_sem_s2t = criterion_Sem(pred_recovered_t, pred_fake_s)

        loss_cycle = loss_cycle_tst + loss_cycle_sts
        weight_in_loss_g = args.weight_in_loss_g.split(',')
        loss_G = float(weight_in_loss_g[0]) * (loss_identity_s + loss_identity_t) + \
                 float(weight_in_loss_g[1]) * (loss_G_s2t + loss_G_t2s) + \
                 float(weight_in_loss_g[2]) * loss_cycle + \
                 float(weight_in_loss_g[3]) * (loss_sem_s2t + loss_sem_t2s)

        # 训练softmax分类器
        outputs_fake = classifier1(fake_t.detach())
        # 分类器优化
        classifier_loss1 = nn.CrossEntropyLoss()(outputs_fake, label_source)
        classifier1_optim.zero_grad()
        classifier_loss1.backward()
        classifier1_optim.step()

        total_loss = loss + args.cyc_loss_weight * loss_G
        total_loss.backward()
        optimizer.step()
        optimizer_G.step()

        ###### Discriminator S ######
        optimizer_D_s.zero_grad()

        # Real loss
        pred_real = D_s(features_source.detach())
        loss_D_real = criterion_GAN(pred_real, real_label)

        # Fake loss
        fake_s = fake_S_buffer.push_and_pop(fake_s)
        pred_fake = D_s(fake_s.detach())
        loss_D_fake = criterion_GAN(pred_fake, fake_label)

        # Total loss
        loss_D_s = loss_D_real + loss_D_fake
        loss_D_s.backward()

        optimizer_D_s.step()
        ###################################

        ###### Discriminator t ######
        optimizer_D_t.zero_grad()

        # Real loss
        pred_real = D_t(features_target.detach())
        loss_D_real = criterion_GAN(pred_real, real_label)

        # Fake loss
        fake_t = fake_T_buffer.push_and_pop(fake_t)
        pred_fake = D_t(fake_t.detach())
        loss_D_fake = criterion_GAN(pred_fake, fake_label)

        # Total loss
        loss_D_t = loss_D_real + loss_D_fake
        loss_D_t.backward()
        optimizer_D_t.step()

        if epoch > start_epoch:
            optimizer_ad.step()
        if (batch_idx + epoch * num_iter) % args.log_interval == 0:
            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}\tLoss+G: {:.6f}'.format(
                epoch, batch_idx * args.batch_size, num_iter * args.batch_size,
                       100. * batch_idx / num_iter, loss.item(), total_loss.item()))
Beispiel #25
0
def train(config):
    ## set pre-process
    prep_dict = {}
    prep_config = config["prep"]
    prep_dict["source"] = prep.image_train(**config["prep"]['params'])
    prep_dict["target"] = prep.image_train(**config["prep"]['params'])
    if prep_config["test_10crop"]:
        prep_dict["test"] = prep.image_test_10crop(**config["prep"]['params'])
    else:
        prep_dict["test"] = prep.image_test(**config["prep"]['params'])

    tensor_writer = SummaryWriter(config["tensorboard_path"])

    ## prepare data
    dsets = {}
    dset_loaders = {}
    data_config = config["data"]
    train_bs = data_config["source"]["batch_size"]
    test_bs = data_config["test"]["batch_size"]

    dsets["source"] = ImageList(open(data_config["source"]["list_path"]).readlines(), \
                                transform=prep_dict["source"])
    dset_loaders["source"] = DataLoader(dsets["source"], batch_size=train_bs, \
            shuffle=True, num_workers=4, drop_last=True)
    dsets["target"] = ImageList(open(data_config["target"]["list_path"]).readlines(), \
                                transform=prep_dict["target"])
    dset_loaders["target"] = DataLoader(dsets["target"], batch_size=train_bs, \
            shuffle=True, num_workers=4, drop_last=True)

    if prep_config["test_10crop"]:
        for i in range(10):
            dsets["test"] = [ImageList(open(data_config["test"]["list_path"]).readlines(), \
                                transform=prep_dict["test"][i]) for i in range(10)]
            dset_loaders["test"] = [DataLoader(dset, batch_size=test_bs, \
                                shuffle=False, num_workers=4) for dset in dsets['test']]
    else:
        dsets["test"] = ImageList(open(data_config["test"]["list_path"]).readlines(), \
                                transform=prep_dict["test"])
        dset_loaders["test"] = DataLoader(dsets["test"], batch_size=test_bs, \
                                shuffle=False, num_workers=4)

    class_num = config["network"]["params"]["class_num"]

    ## set base network
    net_config = config["network"]
    base_network = net_config["name"](**net_config["params"])
    base_network = base_network.cuda()
    parameter_list = base_network.get_parameters()
 
    ## set optimizer
    optimizer_config = config["optimizer"]
    optimizer = optimizer_config["type"](parameter_list, \
                    **(optimizer_config["optim_params"]))
    param_lr = []
    for param_group in optimizer.param_groups:
        param_lr.append(param_group["lr"])
    schedule_param = optimizer_config["lr_param"]
    lr_scheduler = lr_schedule.schedule_dict[optimizer_config["lr_type"]]

    gpus = config['gpu'].split(',')
    if len(gpus) > 1:
        base_network = nn.DataParallel(base_network, device_ids=[int(i) for i in gpus])

    ## train   
    len_train_source = len(dset_loaders["source"])
    len_train_target = len(dset_loaders["target"])
    best_acc = 0.0
    for i in range(config["num_iterations"]):
        if i % config["test_interval"] == config["test_interval"] - 1 or i==0:
            base_network.train(False)
            temp_acc, output, prediction, label, feature = image_classification_test(dset_loaders, \
                base_network, test_10crop=prep_config["test_10crop"])
            temp_model = nn.Sequential(base_network)
            if temp_acc > best_acc:
                best_acc = temp_acc
                best_model = temp_model
            log_str = "iter: {:05d}, precision: {:.5f}".format(i, temp_acc)
            config["out_file"].write(log_str+"\n")
            config["out_file"].flush()
            print(log_str)
        if i % config["snapshot_interval"] == 0:
            torch.save(nn.Sequential(base_network), osp.join(config["output_path"], \
                "iter_{:05d}_model.pth.tar".format(i)))

        loss_params = config["loss"]                  
        ## train one iter
        base_network.train(True)
        optimizer = lr_scheduler(optimizer, i, **schedule_param)
        optimizer.zero_grad()
        if i % len_train_source == 0:
            iter_source = iter(dset_loaders["source"])
        if i % len_train_target == 0:
            iter_target = iter(dset_loaders["target"])
        inputs_source, labels_source = iter_source.next()
        inputs_target, labels_target = iter_target.next()
        inputs_source, inputs_target, labels_source = inputs_source.cuda(), inputs_target.cuda(), labels_source.cuda()
        features_source, outputs_source = base_network(inputs_source)
        features_target, outputs_target = base_network(inputs_target)

        outputs_target_temp = outputs_target / config['temperature']
        target_softmax_out_temp = nn.Softmax(dim=1)(outputs_target_temp)
        target_entropy_weight = loss.Entropy(target_softmax_out_temp).detach()
        target_entropy_weight = 1 + torch.exp(-target_entropy_weight)
        target_entropy_weight = train_bs * target_entropy_weight / torch.sum(target_entropy_weight)
        cov_matrix_t = target_softmax_out_temp.mul(target_entropy_weight.view(-1,1)).transpose(1,0).mm(target_softmax_out_temp)
        cov_matrix_t = cov_matrix_t / torch.sum(cov_matrix_t, dim=1)
        mcc_loss = (torch.sum(cov_matrix_t) - torch.trace(cov_matrix_t)) / class_num

        classifier_loss = nn.CrossEntropyLoss()(outputs_source, labels_source)
        total_loss = classifier_loss + mcc_loss
        total_loss.backward()
        optimizer.step()

        tensor_writer.add_scalar('total_loss', total_loss, i)
        tensor_writer.add_scalar('classifier_loss', classifier_loss, i)
        tensor_writer.add_scalar('cov_matrix_penalty', mcc_loss, i)

    torch.save(best_model, osp.join(config["output_path"], "best_model.pth.tar"))
    return best_acc
Beispiel #26
0
def train_target(args, zz=''):
    dset_loaders = data_load(args)
    ## set base network
    if args.net[0:3] == 'res':
        netF = network.ResBase(res_name=args.net).cuda()

    netB = network.feat_bootleneck(type=args.classifier,
                                   feature_dim=netF.in_features,
                                   bottleneck_dim=args.bottleneck).cuda()
    netC = network.feat_classifier(type=args.layer,
                                   class_num=args.class_num,
                                   bottleneck_dim=args.bottleneck).cuda()

    args.modelpath = args.output_dir_src + '/source_F_' + str(zz) + '.pt'
    netF.load_state_dict(torch.load(args.modelpath))
    args.modelpath = args.output_dir_src + '/source_B_' + str(zz) + '.pt'
    netB.load_state_dict(torch.load(args.modelpath))
    args.modelpath = args.output_dir_src + '/source_C_' + str(zz) + '.pt'
    netC.load_state_dict(torch.load(args.modelpath))
    netC.eval()
    for k, v in netC.named_parameters():
        v.requires_grad = False

    param_group = []
    for k, v in netF.named_parameters():
        if args.lr_decay1 > 0:
            param_group += [{'params': v, 'lr': args.lr * args.lr_decay1}]
        else:
            v.requires_grad = False
    for k, v in netB.named_parameters():
        if args.lr_decay2 > 0:
            param_group += [{'params': v, 'lr': args.lr * args.lr_decay2}]
        else:
            v.requires_grad = False
    optimizer = optim.SGD(param_group,
                          momentum=0.9,
                          weight_decay=5e-4,
                          nesterov=True)

    for epoch in tqdm(range(args.max_epoch), leave=False):
        netF.eval()
        netB.eval()
        mem_label = obtain_label(dset_loaders['test'], netF, netB, netC, args)
        mem_label = torch.from_numpy(mem_label).cuda()
        netF.train()
        netB.train()
        iter_test = iter(dset_loaders['target'])

        for _, (inputs_test, _, tar_idx) in tqdm(enumerate(iter_test),
                                                 leave=False):
            if inputs_test.size(0) == 1:
                continue
            inputs_test = inputs_test.cuda()

            pred = mem_label[tar_idx]
            features_test = netB(netF(inputs_test))
            outputs_test = netC(features_test)

            classifier_loss = loss.CrossEntropyLabelSmooth(
                num_classes=args.class_num, epsilon=0)(outputs_test, pred)
            classifier_loss *= args.cls_par

            if args.ent:
                softmax_out = nn.Softmax(dim=1)(outputs_test)
                entropy_loss = torch.mean(loss.Entropy(softmax_out))
                if args.gent:
                    msoftmax = softmax_out.mean(dim=0)
                    gentropy_loss = torch.sum(
                        -msoftmax * torch.log(msoftmax + args.epsilon))
                    entropy_loss -= gentropy_loss
                classifier_loss += entropy_loss * args.ent_par

            optimizer.zero_grad()
            classifier_loss.backward()
            optimizer.step()

        netF.eval()
        netB.eval()
        acc, _ = cal_acc(dset_loaders['test'], netF, netB, netC)
        log_str = 'Task: {}, Iter:{}/{}; Accuracy = {:.2f}%'.format(
            args.name, epoch + 1, args.max_epoch, acc * 100)
        args.out_file.write(log_str + '\n')
        args.out_file.flush()
        print(log_str + '\n')

    if args.issave:
        torch.save(
            netF.state_dict(),
            osp.join(args.output_dir, 'target_F_' + args.savename + '.pt'))
        torch.save(
            netB.state_dict(),
            osp.join(args.output_dir, 'target_B_' + args.savename + '.pt'))
        torch.save(
            netC.state_dict(),
            osp.join(args.output_dir, 'target_C_' + args.savename + '.pt'))

    return netF, netB, netC
Beispiel #27
0
def train(config):
    ## set pre-process
    prep_dict = {}
    prep_config = config["prep"]
    prep_dict["source"] = prep.image_train(**config["prep"]['params'])
    prep_dict["target"] = prep.image_train(**config["prep"]['params'])
    if prep_config["test_10crop"]:
        prep_dict["test"] = prep.image_test_10crop(**config["prep"]['params'])
    else:
        prep_dict["test"] = prep.image_test(**config["prep"]['params'])

    ## prepare data
    dsets = {}
    dset_loaders = {}
    data_config = config["data"]
    train_bs = data_config["source"]["batch_size"]
    test_bs = data_config["test"]["batch_size"]
    dsets["source"] = ImageList(open(data_config["source"]["list_path"]).readlines(), \
                                transform=prep_dict["source"])
    dset_loaders["source"] = DataLoader(dsets["source"], batch_size=train_bs, \
                                        shuffle=True, num_workers=0, drop_last=True)
    dsets["target"] = ImageList(open(data_config["target"]["list_path"]).readlines(), \
                                transform=prep_dict["target"])
    dset_loaders["target"] = DataLoader(dsets["target"], batch_size=train_bs, \
                                        shuffle=True, num_workers=0, drop_last=True)

    if prep_config["test_10crop"]:
        for i in range(10):
            dsets["test"] = [ImageList(open(data_config["test"]["list_path"]).readlines(), \
                                       transform=prep_dict["test"][i]) for i in range(10)]
            dset_loaders["test"] = [DataLoader(dset, batch_size=test_bs, \
                                               shuffle=False, num_workers=0) for dset in dsets['test']]
    else:
        dsets["test"] = ImageList(open(data_config["test"]["list_path"]).readlines(), \
                                  transform=prep_dict["test"])
        dset_loaders["test"] = DataLoader(dsets["test"], batch_size=test_bs, \
                                          shuffle=False, num_workers=0)

    class_num = config["network"]["params"]["class_num"]

    ## set base network
    net_config = config["network"]
    base_network = net_config["name"](**net_config["params"])
    # base_network = base_network.cuda()

    ## 添加判别器D_s,D_t,生成器G_s2t,G_t2s

    z_dimension = 256
    D_s = network.models["Discriminator"]()
    # D_s = D_s.cuda()
    G_s2t = network.models["Generator"](z_dimension, 1024)
    # G_s2t = G_s2t.cuda()

    D_t = network.models["Discriminator"]()
    # D_t = D_t.cuda()
    G_t2s = network.models["Generator"](z_dimension, 1024)
    # G_t2s = G_t2s.cuda()

    criterion_GAN = torch.nn.MSELoss()
    criterion_cycle = torch.nn.L1Loss()
    criterion_identity = torch.nn.L1Loss()
    criterion_Sem = torch.nn.L1Loss()

    optimizer_G = torch.optim.Adam(itertools.chain(G_s2t.parameters(), G_t2s.parameters()), lr=0.0003)
    optimizer_D_s = torch.optim.Adam(D_s.parameters(), lr=0.0003)
    optimizer_D_t = torch.optim.Adam(D_t.parameters(), lr=0.0003)

    fake_S_buffer = ReplayBuffer()
    fake_T_buffer = ReplayBuffer()

    classifier_optimizer = torch.optim.Adam(base_network.parameters(), lr=0.0003)
    ## 添加分类器
    classifier1 = net.Net(256,class_num)
    # classifier1 = classifier1.cuda()
    classifier1_optim = optim.Adam(classifier1.parameters(), lr=0.0003)

    ## add additional network for some methods
    if config["loss"]["random"]:
        random_layer = network.RandomLayer([base_network.output_num(), class_num], config["loss"]["random_dim"])
        ad_net = network.AdversarialNetwork(config["loss"]["random_dim"], 1024)
    else:
        random_layer = None
        ad_net = network.AdversarialNetwork(base_network.output_num() * class_num, 1024)
    if config["loss"]["random"]:
        random_layer.cuda()
    # ad_net = ad_net.cuda()
    parameter_list = base_network.get_parameters() + ad_net.get_parameters()

    ## set optimizer
    optimizer_config = config["optimizer"]
    optimizer = optimizer_config["type"](parameter_list, \
                                         **(optimizer_config["optim_params"]))
    param_lr = []
    for param_group in optimizer.param_groups:
        param_lr.append(param_group["lr"])
    schedule_param = optimizer_config["lr_param"]
    lr_scheduler = lr_schedule.schedule_dict[optimizer_config["lr_type"]]

    gpus = config['gpu'].split(',')
    if len(gpus) > 1:
        ad_net = nn.DataParallel(ad_net, device_ids=[int(i) for i in gpus])
        base_network = nn.DataParallel(base_network, device_ids=[int(i) for i in gpus])

    ## train
    len_train_source = len(dset_loaders["source"])
    len_train_target = len(dset_loaders["target"])
    transfer_loss_value = classifier_loss_value = total_loss_value = 0.0
    best_acc = 0.0
    for i in range(config["num_iterations"]):
        if i % config["test_interval"] == config["test_interval"] - 1:
            base_network.train(False)
            temp_acc = image_classification_test(dset_loaders, \
                                                 base_network, test_10crop=prep_config["test_10crop"])
            temp_model = nn.Sequential(base_network)
            if temp_acc > best_acc:
                best_acc = temp_acc
                best_model = temp_model

                now = datetime.datetime.now()
                d = str(now.month) + '-' + str(now.day) + ' ' + str(now.hour) + ':' + str(now.minute) + ":" + str(
                    now.second)
                torch.save(best_model, osp.join(config["output_path"],
                                                "{}_to_{}_best_model_acc-{}_{}.pth.tar".format(args.source, args.target,
                                                                                               best_acc, d)))
            log_str = "iter: {:05d}, precision: {:.5f}".format(i, temp_acc)
            config["out_file"].write(log_str + "\n")
            config["out_file"].flush()

            print(log_str)
        if i % config["snapshot_interval"] == 0:
            torch.save(nn.Sequential(base_network), osp.join(config["output_path"], \
                                                             "{}_to_{}_iter_{:05d}_model_{}.pth.tar".format(args.source,
                                                                                                            args.target,
                                                                                                            i, str(
                                                                     datetime.datetime.utcnow()))))
        print("it_train: {:05d} / {:05d} start".format(i, config["num_iterations"]))
        loss_params = config["loss"]
        ## train one iter
        classifier1.train(True)
        base_network.train(True)
        ad_net.train(True)
        optimizer = lr_scheduler(optimizer, i, **schedule_param)
        optimizer.zero_grad()


        if i % len_train_source == 0:
            iter_source = iter(dset_loaders["source"])
        if i % len_train_target == 0:
            iter_target = iter(dset_loaders["target"])
        inputs_source, labels_source = iter_source.next()
        inputs_target, labels_target = iter_target.next()
        # inputs_source, inputs_target, labels_source = inputs_source.cuda(), inputs_target.cuda(), labels_source.cuda()

        # 提取特征
        features_source, outputs_source = base_network(inputs_source)
        features_target, outputs_target = base_network(inputs_target)
        features = torch.cat((features_source, features_target), dim=0)
        outputs = torch.cat((outputs_source, outputs_target), dim=0)
        softmax_out = nn.Softmax(dim=1)(outputs)

        outputs_source1 = classifier1(features_source.detach())
        outputs_target1 = classifier1(features_target.detach())
        outputs1 = torch.cat((outputs_source1,outputs_target1),dim=0)
        softmax_out1 = nn.Softmax(dim=1)(outputs1)

        softmax_out = (1-args.cla_plus_weight)*softmax_out + args.cla_plus_weight*softmax_out1

        if config['method'] == 'CDAN+E':
            entropy = loss.Entropy(softmax_out)
            transfer_loss = loss.CDAN([features, softmax_out], ad_net, entropy, network.calc_coeff(i), random_layer)
        elif config['method'] == 'CDAN':
            transfer_loss = loss.CDAN([features, softmax_out], ad_net, None, None, random_layer)
        elif config['method'] == 'DANN':
            transfer_loss = loss.DANN(features, ad_net)
        else:
            raise ValueError('Method cannot be recognized.')
        classifier_loss = nn.CrossEntropyLoss()(outputs_source, labels_source)

        # Cycle
        num_feature = features_source.size(0)
        # =================train discriminator T
        real_label = Variable(torch.ones(num_feature))
        # real_label = Variable(torch.ones(num_feature)).cuda()
        fake_label = Variable(torch.zeros(num_feature))
        # fake_label = Variable(torch.zeros(num_feature)).cuda()

        # 训练生成器
        optimizer_G.zero_grad()

        # Identity loss
        same_t = G_s2t(features_target.detach())
        loss_identity_t = criterion_identity(same_t, features_target)

        same_s = G_t2s(features_source.detach())
        loss_identity_s = criterion_identity(same_s, features_source)

        # Gan loss
        fake_t = G_s2t(features_source.detach())
        pred_fake = D_t(fake_t)
        loss_G_s2t = criterion_GAN(pred_fake, labels_source.float())

        fake_s = G_t2s(features_target.detach())
        pred_fake = D_s(fake_s)
        loss_G_t2s = criterion_GAN(pred_fake, labels_source.float())

        # cycle loss
        recovered_s = G_t2s(fake_t)
        loss_cycle_sts = criterion_cycle(recovered_s, features_source)

        recovered_t = G_s2t(fake_s)
        loss_cycle_tst = criterion_cycle(recovered_t, features_target)

        # sem loss
        pred_recovered_s = base_network.fc(recovered_s)
        pred_fake_t = base_network.fc(fake_t)
        loss_sem_t2s = criterion_Sem(pred_recovered_s, pred_fake_t)

        pred_recovered_t = base_network.fc(recovered_t)
        pred_fake_s = base_network.fc(fake_s)
        loss_sem_s2t = criterion_Sem(pred_recovered_t, pred_fake_s)

        loss_cycle = loss_cycle_tst + loss_cycle_sts
        weights = args.weight_in_lossG.split(',')
        loss_G = float(weights[0]) * (loss_identity_s + loss_identity_t) + \
                 float(weights[1]) * (loss_G_s2t + loss_G_t2s) + \
                 float(weights[2]) * loss_cycle + \
                 float(weights[3]) * (loss_sem_s2t + loss_sem_t2s)



        # 训练softmax分类器
        outputs_fake = classifier1(fake_t.detach())
        # 分类器优化
        classifier_loss1 = nn.CrossEntropyLoss()(outputs_fake, labels_source)
        classifier1_optim.zero_grad()
        classifier_loss1.backward()
        classifier1_optim.step()

        total_loss = loss_params["trade_off"] * transfer_loss + classifier_loss + args.cyc_loss_weight*loss_G
        total_loss.backward()
        optimizer.step()
        optimizer_G.step()

        ###### Discriminator S ######
        optimizer_D_s.zero_grad()

        # Real loss
        pred_real = D_s(features_source.detach())
        loss_D_real = criterion_GAN(pred_real, real_label)

        # Fake loss
        fake_s = fake_S_buffer.push_and_pop(fake_s)
        pred_fake = D_s(fake_s.detach())
        loss_D_fake = criterion_GAN(pred_fake, fake_label)

        # Total loss
        loss_D_s = loss_D_real + loss_D_fake
        loss_D_s.backward()

        optimizer_D_s.step()
        ###################################

        ###### Discriminator t ######
        optimizer_D_t.zero_grad()

        # Real loss
        pred_real = D_t(features_target.detach())
        loss_D_real = criterion_GAN(pred_real, real_label)

        # Fake loss
        fake_t = fake_T_buffer.push_and_pop(fake_t)
        pred_fake = D_t(fake_t.detach())
        loss_D_fake = criterion_GAN(pred_fake, fake_label)

        # Total loss
        loss_D_t = loss_D_real + loss_D_fake
        loss_D_t.backward()
        optimizer_D_t.step()
        print("it_train: {:05d} / {:05d} over".format(i, config["num_iterations"]))
    now = datetime.datetime.now()
    d = str(now.month)+'-'+str(now.day)+' '+str(now.hour)+':'+str(now.minute)+":"+str(now.second)
    torch.save(best_model, osp.join(config["output_path"],
                                    "{}_to_{}_best_model_acc-{}_{}.pth.tar".format(args.source, args.target,
                                                                            best_acc,d)))
    return best_acc
Beispiel #28
0
def train(config):
    ####################################################
    # Tensorboard setting
    ####################################################
    #tensor_writer = SummaryWriter(config["tensorboard_path"])

    ####################################################
    # Data setting
    ####################################################

    prep_dict = {}  # 데이터 전처리 transforms 부분
    prep_dict["source"] = prep.image_train(**config['prep']['params'])
    prep_dict["target"] = prep.image_train(**config["prep"]['params'])
    prep_dict["test"] = prep.image_test(**config['prep']['params'])

    dsets = {}
    dsets["source"] = datasets.ImageFolder(config['s_dset_path'],
                                           transform=prep_dict["source"])
    dsets["target"] = datasets.ImageFolder(config['t_dset_path'],
                                           transform=prep_dict['target'])
    dsets['test'] = datasets.ImageFolder(config['t_dset_path'],
                                         transform=prep_dict['test'])

    data_config = config["data"]
    train_source_bs = data_config["source"][
        "batch_size"]  #원본은 source와 target 모두 source train bs로 설정되었는데 이를 수정함
    train_target_bs = data_config['target']['batch_size']
    test_bs = data_config["test"]["batch_size"]

    dset_loaders = {}
    dset_loaders["source"] = DataLoader(
        dsets["source"],
        batch_size=train_source_bs,
        shuffle=True,
        num_workers=4,
        drop_last=True
    )  # 원본은 drop_last=True, 이렇게 해야 마지막까지 source, target에서 동일한 수로 배치 생성가능
    dset_loaders["target"] = DataLoader(dsets["target"],
                                        batch_size=train_target_bs,
                                        shuffle=True,
                                        num_workers=4,
                                        drop_last=True)
    dset_loaders['test'] = DataLoader(dsets['test'],
                                      batch_size=test_bs,
                                      shuffle=False,
                                      num_workers=4,
                                      drop_last=False)

    ####################################################
    # Network Setting
    ####################################################

    class_num = config["network"]['params']['class_num']

    net_config = config["network"]
    """
        config['network'] = {'name': network.ResNetFc,
                         'params': {'resnet_name': args.net,
                                    'use_bottleneck': True,
                                    'bottleneck_dim': 256,
                                    'new_cls': True,
                                    'class_num': args.class_num,
                                    'type' : args.type}
                         }
    """

    base_network = net_config["name"](**net_config["params"])
    #network.py에 정의된 ResNetFc() 클래스 호출
    base_network = base_network.cuda()  # ResNetFc(Resnet, True, 256, True, 12)

    if config["loss"]["random"]:
        random_layer = network.RandomLayer(
            [base_network.output_num(), class_num],
            config["loss"]["random_dim"])
        random_layer.cuda()
        ad_net = network.AdversarialNetwork(config["loss"]["random_dim"], 1024)
    else:
        random_layer = None
        ad_net = network.AdversarialNetwork(
            base_network.output_num() * class_num, 1024)  # 왜 class 수 만큼 곱하지?

    ad_net = ad_net.cuda()

    parameter_list = base_network.get_parameters() + ad_net.get_parameters()

    ####################################################
    # Env Setting
    ####################################################

    #gpus = config['gpu'].split(',')
    #if len(gpus) > 1 :
    #ad_net = nn.DataParallel(ad_net, device_ids=[int(i) for i in gpus])
    #base_network = nn.DataParallel(base_network, device_ids=[int(i) for i in gpus])

    ####################################################
    # Optimizer Setting
    ####################################################

    optimizer_config = config['optimizer']
    optimizer = optimizer_config["type"](parameter_list,
                                         **(optimizer_config["optim_params"]))
    # optim.SGD

    #config['optimizer'] = {'type': optim.SGD,
    #'optim_params': {'lr': args.lr,
    #'momentum': 0.9,
    #'weight_decay': 0.0005,
    #'nestrov': True},
    #'lr_type': "inv",
    #'lr_param': {"lr": args.lr,
    #'gamma': 0.001, # 이거 0.01이여야 하지 않나?
    #'power': 0.75
    #}

    param_lr = []
    for param_group in optimizer.param_groups:
        param_lr.append(param_group['lr'])

    schedule_param = optimizer_config['lr_param']

    lr_scheduler = lr_schedule.schedule_dict[
        optimizer_config["lr_type"]]  # return optimizer

    ####################################################
    # Train
    ####################################################

    len_train_source = len(dset_loaders["source"])
    len_train_target = len(dset_loaders["target"])

    transfer_loss_value = 0.0
    classifier_loss_value = 0.0
    total_loss_value = 0.0

    best_acc = 0.0

    batch_size = config["data"]["source"]["batch_size"]

    for i in range(
            config["num_iterations"]):  # num_iterations수의 batch가 학습에 사용됨
        sys.stdout.write("Iteration : {} \r".format(i))
        sys.stdout.flush()

        loss_params = config["loss"]

        base_network.train(True)
        ad_net.train(True)

        optimizer = lr_scheduler(optimizer, i, **schedule_param)
        optimizer.zero_grad()

        if i % len_train_source == 0:
            iter_source = iter(dset_loaders["source"])
        if i % len_train_target == 0:
            iter_target = iter(dset_loaders["target"])

        inputs_source, labels_source = iter_source.next()
        inputs_target, labels_target = iter_target.next()

        inputs_source, labels_source = inputs_source.cuda(
        ), labels_source.cuda()
        inputs_target = inputs_target.cuda()

        inputs = torch.cat((inputs_source, inputs_target), dim=0)

        features, outputs, tau, cur_mean_source, cur_mean_target, output_mean_source, output_mean_target = base_network(
            inputs)

        softmax_out = nn.Softmax(dim=1)(outputs)

        outputs_source = outputs[:batch_size]
        outputs_target = outputs[batch_size:]

        if config['method'] == 'CDAN+E' or config['method'] == 'CDAN_TransNorm':
            entropy = loss.Entropy(softmax_out)
            transfer_loss = loss.CDAN([features, softmax_out], ad_net, entropy,
                                      network.calc_coeff(i), random_layer)
        elif config['method'] == 'CDAN':
            transfer_loss = loss.CDAN([features, softmax_out], ad_net, None,
                                      None, random_layer)
        elif config['method'] == 'DANN':
            pass  # 나중에 정리하기
        else:
            raise ValueError('Method cannot be recognized')

        classifier_loss = nn.CrossEntropyLoss()(outputs_source, labels_source)
        total_loss = loss_params["trade_off"] * transfer_loss + classifier_loss

        total_loss.backward()
        optimizer.step()

        #tensor_writer.add_scalar('total_loss', total_loss.i )
        #tensor_writer.add_scalar('classifier_loss', classifier_loss, i)
        #tensor_writer.add_scalar('transfer_loss', transfer_loss, i)

        ####################################################
        # Test
        ####################################################
        if i % config["test_interval"] == config["test_interval"] - 1:
            # test interval 마다
            base_network.train(False)
            temp_acc = image_classification_test(dset_loaders, base_network)
            temp_model = nn.Sequential(base_network)
            if temp_acc > best_acc:
                best_acc = temp_acc
                best_model = temp_model
                ACC = round(best_acc, 2) * 100
                torch.save(
                    best_model,
                    os.path.join(config["output_path"],
                                 "iter_{}_model.pth.tar".format(ACC)))
            log_str = "iter: {:05d}, precision: {:.5f}".format(i, temp_acc)
            config["out_file"].write(log_str + "\n")
            config["out_file"].flush()
            print(log_str)
Beispiel #29
0
def train(config):
    ## set pre-process
    prep_dict = {}
    prep_config = config["prep"]
    prep_dict["source"] = prep.image_train(**config["prep"]['params'])
    prep_dict["target"] = prep.image_train(**config["prep"]['params'])
    if prep_config["test_10crop"]:
        prep_dict["test"] = prep.image_test_10crop(**config["prep"]['params'])
    else:
        prep_dict["test"] = prep.image_test(**config["prep"]['params'])

    ## prepare data
    dsets = {}
    dset_loaders = {}
    data_config = config["data"]
    train_bs = data_config["source"]["batch_size"]
    test_bs = data_config["test"]["batch_size"]
    dsets["source"] = ImageList(open(data_config["source"]["list_path"]).readlines(), \
                                transform=prep_dict["source"])
    dset_loaders["source"] = DataLoader(dsets["source"], batch_size=train_bs, \
            shuffle=True, num_workers=4, drop_last=True)
    dsets["target"] = ImageList(open(data_config["target"]["list_path"]).readlines(), \
                                transform=prep_dict["target"])
    dset_loaders["target"] = DataLoader(dsets["target"], batch_size=train_bs, \
            shuffle=True, num_workers=4, drop_last=True)

    #     if prep_config["test_10crop"]:
    #         for i in range(10):
    #             dsets["test"] = [ImageList(open(data_config["test"]["list_path"]).readlines(), \
    #                                 transform=prep_dict["test"][i]) for i in range(10)]
    #             dset_loaders["test"] = [DataLoader(dset, batch_size=test_bs, \
    #                                 shuffle=False, num_workers=4) for dset in dsets['test']]
    #     else:
    #         dsets["test"] = ImageList(open(data_config["test"]["list_path"]).readlines(), \
    #                                 transform=prep_dict["test"])
    #         dset_loaders["test"] = DataLoader(dsets["test"], batch_size=test_bs, \
    #                                 shuffle=False, num_workers=4)

    class_num = config["network"]["params"]["class_num"]

    ## set base network
    net_config = config["network"]
    base_network = net_config["name"](**net_config["params"])
    base_network = base_network.cuda()

    ## add additional network for some methods
    if config["loss"]["random"]:
        random_layer = network.RandomLayer(
            [base_network.output_num(), class_num],
            config["loss"]["random_dim"])
        ad_net = network.AdversarialNetwork(config["loss"]["random_dim"], 1024)
    else:
        random_layer = None
        ad_net = network.AdversarialNetwork(
            base_network.output_num() * class_num, 1024)
    if config["loss"]["random"]:
        random_layer.cuda()
    ad_net = ad_net.cuda()
    parameter_list = base_network.get_parameters() + ad_net.get_parameters()

    ## set optimizer
    optimizer_config = config["optimizer"]
    optimizer = optimizer_config["type"](parameter_list, \
                    **(optimizer_config["optim_params"]))
    param_lr = []
    for param_group in optimizer.param_groups:
        param_lr.append(param_group["lr"])
    schedule_param = optimizer_config["lr_param"]
    lr_scheduler = lr_schedule.schedule_dict[optimizer_config["lr_type"]]

    gpus = config['gpu'].split(',')
    if len(gpus) > 1:
        ad_net = nn.DataParallel(ad_net, device_ids=[int(i) for i in gpus])
        base_network = nn.DataParallel(base_network,
                                       device_ids=[int(i) for i in gpus])

    ## train
    len_train_source = len(dset_loaders["source"])
    len_train_target = len(dset_loaders["target"])
    transfer_loss_value = classifier_loss_value = total_loss_value = 0.0
    best_acc = 0.0
    for i in range(config["num_iterations"]):
        #         if i % config["test_interval"] == config["test_interval"] - 1:
        #             base_network.train(False)
        #             temp_acc = image_classification_test(dset_loaders, \
        #                 base_network, test_10crop=prep_config["test_10crop"])
        #             temp_model = nn.Sequential(base_network)
        #             if temp_acc > best_acc:
        #                 best_acc = temp_acc
        #                 best_model = temp_model
        #             log_str = "iter: {:05d}, precision: {:.5f}".format(i, temp_acc)
        #             config["out_file"].write(log_str+"\n")
        #             config["out_file"].flush()
        #             print(log_str)
        if i % config["snapshot_interval"] == 0:
            torch.save(nn.Sequential(base_network), osp.join(config["output_path"], \
                "iter_{:05d}_model.pth.tar".format(i)))

        loss_params = config["loss"]
        ## train one iter
        base_network.train(True)
        ad_net.train(True)
        optimizer = lr_scheduler(optimizer, i, **schedule_param)
        optimizer.zero_grad()
        if i % len_train_source == 0:
            iter_source = iter(dset_loaders["source"])
        if i % len_train_target == 0:
            iter_target = iter(dset_loaders["target"])
        inputs_source, labels_source = iter_source.next()
        inputs_target, labels_target = iter_target.next()
        inputs_source, inputs_target, labels_source = inputs_source.cuda(
        ), inputs_target.cuda(), labels_source.cuda()
        features_source, outputs_source = base_network(inputs_source)
        features_target, outputs_target = base_network(inputs_target)
        features = torch.cat((features_source, features_target), dim=0)
        outputs = torch.cat((outputs_source, outputs_target), dim=0)
        softmax_out = nn.Softmax(dim=1)(outputs)
        if config['method'] == 'CDAN+E':
            entropy = loss.Entropy(softmax_out)
            transfer_loss = loss.CDAN([features, softmax_out], ad_net, entropy,
                                      network.calc_coeff(i), random_layer)
        elif config['method'] == 'CDAN':
            transfer_loss = loss.CDAN([features, softmax_out], ad_net, None,
                                      None, random_layer)
        elif config['method'] == 'DANN':
            transfer_loss = loss.DANN(features, ad_net)
        else:
            raise ValueError('Method cannot be recognized.')
        classifier_loss = nn.CrossEntropyLoss()(outputs_source, labels_source)
        if i % 10 == 0:
            print('iter: ', i, 'classifier_loss: ', classifier_loss.data,
                  'transfer_loss: ', transfer_loss.data)
        total_loss = loss_params["trade_off"] * transfer_loss + classifier_loss
        total_loss.backward()
        optimizer.step()
    torch.save(best_model, osp.join(config["output_path"],
                                    "best_model.pth.tar"))
    return best_acc
def train_target(args):
    dset_loaders = digit_load(args)
    ## set base network
    if args.dset == 'u2m':
        netF = network.LeNetBase().cuda()
    elif args.dset == 'm2u':
        netF = network.LeNetBase().cuda()
    elif args.dset == 's2m':
        netF = network.DTNBase().cuda()

    netB = network.feat_bootleneck(type=args.classifier,
                                   feature_dim=netF.in_features,
                                   bottleneck_dim=args.bottleneck).cuda()
    netC = network.feat_classifier(type=args.layer,
                                   class_num=args.class_num,
                                   bottleneck_dim=args.bottleneck).cuda()

    args.modelpath = args.output_dir + '/source_F.pt'
    netF.load_state_dict(torch.load(args.modelpath))
    args.modelpath = args.output_dir + '/source_B.pt'
    netB.load_state_dict(torch.load(args.modelpath))
    args.modelpath = args.output_dir + '/source_C.pt'
    netC.load_state_dict(torch.load(args.modelpath))
    netC.eval()
    for k, v in netC.named_parameters():
        v.requires_grad = False

    param_group = []
    for k, v in netF.named_parameters():
        param_group += [{'params': v, 'lr': args.lr}]
    for k, v in netB.named_parameters():
        param_group += [{'params': v, 'lr': args.lr}]

    optimizer = optim.SGD(param_group)
    optimizer = op_copy(optimizer)

    max_iter = args.max_epoch * len(dset_loaders["target"])
    interval_iter = len(dset_loaders["target"])
    # interval_iter = max_iter // args.interval
    iter_num = 0

    while iter_num < max_iter:
        optimizer.zero_grad()
        try:
            inputs_test, _, tar_idx = iter_test.next()
        except:
            iter_test = iter(dset_loaders["target"])
            inputs_test, _, tar_idx = iter_test.next()

        if inputs_test.size(0) == 1:
            continue

        if iter_num % interval_iter == 0 and args.cls_par > 0:
            netF.eval()
            netB.eval()
            mem_label = obtain_label(dset_loaders['target_te'], netF, netB,
                                     netC, args)
            mem_label = torch.from_numpy(mem_label).cuda()
            netF.train()
            netB.train()

        iter_num += 1
        lr_scheduler(optimizer, iter_num=iter_num, max_iter=max_iter)

        inputs_test = inputs_test.cuda()
        features_test = netB(netF(inputs_test))
        outputs_test = netC(features_test)

        if args.cls_par > 0:
            pred = mem_label[tar_idx]
            classifier_loss = args.cls_par * nn.CrossEntropyLoss()(
                outputs_test, pred)
        else:
            classifier_loss = torch.tensor(0.0).cuda()

        if args.ent:
            softmax_out = nn.Softmax(dim=1)(outputs_test)
            entropy_loss = torch.mean(loss.Entropy(softmax_out))
            # if args.gent:
            #     msoftmax = softmax_out.mean(dim=0)
            #     entropy_loss -= torch.sum(-msoftmax * torch.log(msoftmax + 1e-5))

            im_loss = entropy_loss * args.ent_par
            classifier_loss += im_loss

        optimizer.zero_grad()
        classifier_loss.backward()
        optimizer.step()

        if iter_num % interval_iter == 0 or iter_num == max_iter:
            netF.eval()
            netB.eval()
            acc, _ = cal_acc(dset_loaders['test'], netF, netB, netC)
            log_str = 'Task: {}, Iter:{}/{}; Accuracy = {:.2f}%'.format(
                args.dset, iter_num, max_iter, acc)
            args.out_file.write(log_str + '\n')
            args.out_file.flush()
            print(log_str + '\n')
            netF.train()
            netB.train()

    if args.issave:
        torch.save(
            netF.state_dict(),
            osp.join(args.output_dir, "target_F_" + args.savename + ".pt"))
        torch.save(
            netB.state_dict(),
            osp.join(args.output_dir, "target_B_" + args.savename + ".pt"))
        torch.save(
            netC.state_dict(),
            osp.join(args.output_dir, "target_C_" + args.savename + ".pt"))

    return netF, netB, netC