def main(args):
    np.random.seed(args.seed)
    torch.manual_seed(args.seed)
    cudnn.benchmark = True

    # Create data loaders
    assert args.num_instances > 1, "num_instances should be greater than 1"
    assert args.batch_size % args.num_instances == 0, \
        'num_instances should divide batch_size'
    if args.height is None or args.width is None:
        args.height, args.width = (144, 56) if args.arch == 'inception' else \
            (256, 128)

    # get source data
    src_dataset, src_extfeat_loader = \
        get_source_data(args.src_dataset, args.data_dir, args.height,
                        args.width, args.batch_size, args.workers)
    # get target data
    tgt_dataset, num_classes, tgt_extfeat_loader, test_loader = \
        get_data(args.tgt_dataset, args.data_dir, args.height,
                 args.width, args.batch_size, args.workers)

    # Create model
    # Hacking here to let the classifier be the number of source ids
    if args.src_dataset == 'dukemtmc':
        model = models.create(args.arch, num_classes=632, pretrained=False)
        coModel = models.create(args.arch, num_classes=632, pretrained=False)
    elif args.src_dataset == 'market1501':
        model = models.create(args.arch, num_classes=676, pretrained=False)
        coModel = models.create(args.arch, num_classes=676, pretrained=False)
    elif args.src_dataset == 'msmt17':
        model = models.create(args.arch, num_classes=1041, pretrained=False)
        coModel = models.create(args.arch, num_classes=1041, pretrained=False)
    elif args.src_dataset == 'cuhk03':
        model = models.create(args.arch, num_classes=1230, pretrained=False)
        coModel = models.create(args.arch, num_classes=1230, pretrained=False)
    else:
        raise RuntimeError(
            'Please specify the number of classes (ids) of the network.')

    # Load from checkpoint
    if args.resume:
        print(
            'Resuming checkpoints from finetuned model on another dataset...\n'
        )
        checkpoint = load_checkpoint(args.resume)
        model.load_state_dict(checkpoint['state_dict'], strict=False)
        coModel.load_state_dict(checkpoint['state_dict'], strict=False)
    else:
        raise RuntimeWarning('Not using a pre-trained model.')
    model = nn.DataParallel(model).cuda()
    coModel = nn.DataParallel(coModel).cuda()

    # Criterion
    criterion = [
        TripletLoss(args.margin,
                    args.num_instances,
                    isAvg=False,
                    use_semi=False).cuda(),
        TripletLoss(args.margin,
                    args.num_instances,
                    isAvg=False,
                    use_semi=False).cuda()
    ]
    optimizer = torch.optim.Adam(model.parameters(), lr=args.lr)
    coOptimizer = torch.optim.Adam(coModel.parameters(), lr=args.lr)

    optims = [optimizer, coOptimizer]

    # training stage transformer on input images
    normalizer = T.Normalize(mean=[0.485, 0.456, 0.406],
                             std=[0.229, 0.224, 0.225])
    train_transformer = T.Compose([
        T.Resize((args.height, args.width)),
        T.RandomHorizontalFlip(),
        T.ToTensor(), normalizer,
        T.RandomErasing(probability=0.5, sh=0.2, r1=0.3)
    ])

    # # Start training
    for iter_n in range(args.iteration):
        if args.lambda_value == 0:
            source_features = 0
        else:
            # get source datas' feature
            source_features, _ = extract_features(model,
                                                  src_extfeat_loader,
                                                  print_freq=args.print_freq,
                                                  numStripe=None)
            # synchronization feature order with src_dataset.train
            source_features = torch.cat([
                source_features[f].unsqueeze(0)
                for f, _, _ in src_dataset.train
            ], 0)

            # extract training images' features
        print('Iteration {}: Extracting Target Dataset Features...'.format(
            iter_n + 1))
        target_features, _ = extract_features(model,
                                              tgt_extfeat_loader,
                                              print_freq=args.print_freq,
                                              numStripe=None)
        # synchronization feature order with dataset.train
        target_features = torch.cat([
            target_features[f].unsqueeze(0) for f, _, _ in tgt_dataset.trainval
        ], 0)
        # calculate distance and rerank result
        print('Calculating feature distances...')
        target_features = target_features.numpy()
        rerank_dist = re_ranking(source_features,
                                 target_features,
                                 lambda_value=args.lambda_value)
        if iter_n == 0:
            # DBSCAN cluster
            tri_mat = np.triu(rerank_dist, 1)  # tri_mat.dim=2
            tri_mat = tri_mat[np.nonzero(tri_mat)]  # tri_mat.dim=1
            tri_mat = np.sort(tri_mat, axis=None)
            top_num = np.round(args.rho * tri_mat.size).astype(int)
            eps = tri_mat[:top_num].mean()
            print('eps in cluster: {:.3f}'.format(eps))
            cluster = DBSCAN(eps=eps,
                             min_samples=4,
                             metric='precomputed',
                             n_jobs=8)
        # select & cluster images as training set of this epochs
        print('Clustering and labeling...')
        labels = cluster.fit_predict(rerank_dist)
        num_ids = len(set(labels)) - 1
        print('Iteration {} have {} training ids'.format(iter_n + 1, num_ids))
        # generate new dataset
        new_dataset = []
        # assign label for target ones
        newLab = labelNoise(torch.from_numpy(target_features),
                            torch.from_numpy(labels))
        # unknownFeats = target_features[labels==-1,:]
        counter = 0
        from collections import defaultdict
        realIDs, fakeIDs = defaultdict(list), []
        for (fname, realID, cam), label in zip(tgt_dataset.trainval, newLab):
            # dont need to change codes in trainer.py _parsing_input function and sampler function after add 0
            new_dataset.append((fname, label, cam))
            realIDs[realID].append(counter)
            fakeIDs.append(label)
            counter += 1
        precision, recall, fscore = calScores(realIDs, np.asarray(fakeIDs))
        print('Iteration {} have {} training images'.format(
            iter_n + 1, len(new_dataset)))
        print(
            f'precision:{precision * 100}, recall:{100 * recall}, fscore:{100 * fscore}'
        )
        train_loader = DataLoader(Preprocessor(new_dataset,
                                               root=tgt_dataset.images_dir,
                                               transform=train_transformer),
                                  batch_size=args.batch_size,
                                  num_workers=4,
                                  sampler=RandomIdentitySampler(
                                      new_dataset, args.num_instances),
                                  pin_memory=True,
                                  drop_last=True)
        trainer = CoTeaching(model, coModel, train_loader, criterion, optims)

        # Start training
        for epoch in range(args.epochs):
            trainer.train(epoch,
                          remRate=0.2 + (0.8 / args.iteration) *
                          (1 + iter_n))  # to at most 80%
        # test only
        evaluator = Evaluator(model, print_freq=args.print_freq)
        rank_score = evaluator.evaluate(test_loader, tgt_dataset.query,
                                        tgt_dataset.gallery)

    # Evaluate
    rank_score = evaluator.evaluate(test_loader, tgt_dataset.query,
                                    tgt_dataset.gallery)
    save_checkpoint(
        {
            'state_dict': model.module.state_dict(),
            'epoch': epoch + 1,
            'best_top1': rank_score.market1501[0],
        },
        True,
        fpath=osp.join(args.logs_dir, 'adapted.pth.tar'))
    return rank_score.map, rank_score.market1501[0]
예제 #2
0
 def get_feature(self, dataset):
     dataloader = self.get_dataloader(dataset, training=False)
     features, _ = extract_features(self.model, dataloader)
     features = np.array([logit.numpy() for logit in features.values()])
     return features
예제 #3
0
 #
 trainer = Trainer(model, unlabeled_criterion=criterion)
 #
 for epoch in range(epochs):
     #
     trainer.train(epoch,
                   labeled_loader,
                   unlabeled_loader,
                   optimizer,
                   use_unselect_data=adjust_lr(epoch, step_size=55))
 save_checkpoint({
     'step': step,
     'state_dict': model.state_dict()
 }, fpath, 'checkpoint_' + str(step) + '.pt')
 # Get the labeled__features and unlabeled_features.
 labeled__features, _ = extract_features(
     model, get_loader(labeled_data, dataset.images_dir, training=False))
 unlabeled_features, _ = extract_features(
     model, get_loader(unlabeled_data, dataset.images_dir, training=False))
 # Calculate the distance between labeled__features and unlabeled_features.
 x = torch.cat(
     [unlabeled_features[f].unsqueeze(0) for f, _, _, _ in unlabeled_data],
     0)
 y = torch.cat(
     [labeled__features[f].unsqueeze(0) for f, _, _, _ in labeled_data], 0)
 m, n = x.size(0), y.size(0)
 x = x.view(m, -1)
 y = y.view(n, -1)
 dist = torch.pow(x, 2).sum(dim=1, keepdim=True).expand(m, n) + \
        torch.pow(y, 2).sum(dim=1, keepdim=True).expand(n, m).t()
 dist.addmm_(1, -2, x, y.t())
 #
예제 #4
0
def main(args):
    cudnn.benchmark = True

    num_classes = 4101  # in training set
    start_prob = 0
    end_prob = 1000

    # Create data loaders
    dataset, query_loader, gallery_loader = get_test_data(
        args.testset, args.data_dir, args.height, args.width,
        args.test_fea_batch)

    # Create model
    model = resmap.create(args.arch,
                          final_layer=args.final_layer,
                          neck=args.neck).cuda()
    num_features = model.num_features

    # Criterion
    feamap_factor = {'layer2': 8, 'layer3': 16, 'layer4': 32}
    hei = args.height // feamap_factor[args.final_layer]
    wid = args.width // feamap_factor[args.final_layer]
    criterion = QAConvLoss(num_classes, num_features, hei, wid,
                           args.mem_batch_size).cuda()

    print('Loading checkpoint...')
    checkpoint = load_checkpoint(osp.join(args.exp_dir, 'checkpoint.pth.tar'))
    model.load_state_dict(checkpoint['model'])
    criterion.load_state_dict(checkpoint['criterion'])

    model = nn.DataParallel(model).cuda()

    # Final test
    print('Evaluate the learned model:')
    t0 = time.time()

    feature, _ = extract_features(model, query_loader)
    feature = torch.cat(
        [feature[f].unsqueeze(0) for f, _, _, _ in dataset.query], 0)

    num_probs = end_prob - start_prob
    feature = feature[start_prob:end_prob]
    feature = feature.cuda()

    with torch.no_grad():
        score = torch.zeros(num_probs, num_probs)
        prob_score = torch.zeros(num_probs, num_probs, hei, wid)
        index_in_gal = torch.zeros(num_probs, num_probs, hei, wid)
        gal_score = torch.zeros(num_probs, num_probs, hei, wid)
        index_in_prob = torch.zeros(num_probs, num_probs, hei, wid)
        qaconv = torch.nn.DataParallel(QAConvMatch(
            feature, criterion).cuda()).cuda().eval()
        batch_size = args.test_prob_batch

        for i in range(0, num_probs, batch_size):
            end = min(num_probs, i + batch_size)
            s, ps, ig, gs, ip = qaconv(feature[i:end])
            score[i:end, :] = s
            prob_score[i:end, :, :, :] = ps
            index_in_gal[i:end, :, :, :] = ig
            gal_score[i:end, :, :, :] = gs
            index_in_prob[i:end, :, :, :] = ip
            if ((i + 1) // batch_size) % 100 == 0:
                print(
                    'Compute similarity: [{}/{}]. Min score: {}. Max score: {}. Avg score: {}.'
                    .format(i, num_probs, s.min(), s.max(), s.mean()))

    test_prob_list = np.array([fname for fname, _, _, _ in dataset.query],
                              dtype=np.object)
    test_prob_ids = [pid for _, pid, _, _ in dataset.query]
    test_prob_cams = [cam for _, _, cam, _ in dataset.query]
    test_prob_list = test_prob_list[start_prob:end_prob]
    test_prob_ids = test_prob_ids[start_prob:end_prob]
    test_prob_cams = test_prob_cams[start_prob:end_prob]
    test_score_file = osp.join(
        args.exp_dir,
        '%s_query_score_%d-%d.mat' % (args.testset, start_prob, end_prob))
    weight = criterion.fc.weight.view(2, hei, wid).detach().cpu()
    sio.savemat(test_score_file, {
        'fc': weight.numpy(),
        'score': score.numpy(),
        'prob_score': prob_score.numpy(),
        'index_in_gal': index_in_gal.numpy(),
        'gal_score': gal_score.numpy(),
        'index_in_prob': index_in_prob.numpy(),
        'prob_list': test_prob_list,
        'prob_ids': test_prob_ids,
        'prob_cams': test_prob_cams
    },
                oned_as='column',
                do_compression=True)

    test_time = time.time() - t0
    print("Total testing time: %.3f sec.\n" % test_time)
예제 #5
0
 def get_features(self, dataset):
     dataloader = self.get_dataloader(dataset, training=False)
     vid_features, img_features, _ = extract_features(self.model, dataloader)
     vid_features = torch.stack([logit for logit in vid_features.values()])
     img_features = [logit for logit in img_features.values()]
     return vid_features, img_features
def main(args):
    np.random.seed(args.seed)
    torch.manual_seed(args.seed)
    cudnn.benchmark = True

    # Create data loaders
    assert args.num_instances > 1, "num_instances should be greater than 1"
    assert args.batch_size % args.num_instances == 0, \
        'num_instances should divide batch_size'
    if args.height is None or args.width is None:
        args.height, args.width = (144, 56) if args.arch == 'inception' else \
                                  (256, 128)

    # get source data
    src_dataset, src_extfeat_loader = \
        get_source_data(args.src_dataset, args.data_dir, args.height,
                        args.width, args.batch_size, args.workers)
    # get target data
    tgt_dataset, num_classes, tgt_extfeat_loader, test_loader = \
        get_data(args.tgt_dataset, args.data_dir, args.height,
                 args.width, args.batch_size, args.workers)

    # Create model
    # Hacking here to let the classifier be the number of source ids
    if args.src_dataset == 'dukemtmc':
        model = models.create(args.arch, num_classes=632, pretrained=False)
    elif args.src_dataset == 'market1501':
        model = models.create(args.arch, num_classes=676, pretrained=False)
    else:
        raise RuntimeError(
            'Please specify the number of classes (ids) of the network.')

    # Load from checkpoint
    if args.resume:
        print(
            'Resuming checkpoints from finetuned model on another dataset...\n'
        )
        checkpoint = load_checkpoint(args.resume)
        model.load_state_dict(checkpoint['state_dict'], strict=False)
    else:
        raise RuntimeWarning('Not using a pre-trained model.')
    model = nn.DataParallel(model).cuda()

    # Distance metric
    # metric = DistanceMetric(algorithm=args.dist_metric)

    # Evaluator
    evaluator = Evaluator(model, print_freq=args.print_freq)
    print(
        "Test with the original model trained on source domain (direct transfer):"
    )
    rank_score_best = evaluator.evaluate(test_loader, tgt_dataset.query,
                                         tgt_dataset.gallery)
    best_map = rank_score_best.map  #market1501[0]-->rank-1

    if args.evaluate:
        return

    # Criterion
    criterion = [
        TripletLoss(args.margin, args.num_instances).cuda(),
        TripletLoss(args.margin, args.num_instances).cuda(),
        AccumulatedLoss(args.margin, args.num_instances).cuda(),
    ]

    # Optimizer
    optimizer = torch.optim.SGD(
        model.parameters(),
        lr=args.lr,
        momentum=0.9,
    )

    # training stage transformer on input images
    normalizer = T.Normalize(mean=[0.485, 0.456, 0.406],
                             std=[0.229, 0.224, 0.225])
    train_transformer = T.Compose([
        T.Resize((args.height, args.width)),
        T.RandomHorizontalFlip(),
        T.ToTensor(), normalizer,
        T.RandomErasing(probability=0.5, sh=0.2, r1=0.3)
    ])

    # Start training
    for iter_n in range(args.iteration):
        if args.lambda_value == 0:
            source_features = 0  #this value controls the usage of source data
        else:
            # get source datas' feature
            source_features, _ = extract_features(model,
                                                  src_extfeat_loader,
                                                  print_freq=args.print_freq)
            # synchronization feature order with src_dataset.train
            source_features = torch.cat([
                source_features[f].unsqueeze(0)
                for f, _, _ in src_dataset.train
            ], 0)

        # extract training images' features
        print('Iteration {}: Extracting Target Dataset Features...'.format(
            iter_n + 1))
        target_features, _ = extract_features(model,
                                              tgt_extfeat_loader,
                                              print_freq=args.print_freq)
        # synchronization feature order with dataset.train
        target_features = torch.cat([
            target_features[f].unsqueeze(0) for f, _, _ in tgt_dataset.trainval
        ], 0)
        # calculate distance and rerank result
        print('Calculating feature distances...')
        target_features = target_features.numpy()
        rerank_dist = re_ranking(source_features,
                                 target_features,
                                 lambda_value=args.lambda_value)
        if iter_n == 0:
            # DBSCAN cluster
            tri_mat = np.triu(rerank_dist, 1)  # tri_mat.dim=2
            tri_mat = tri_mat[np.nonzero(tri_mat)]  # tri_mat.dim=1
            tri_mat = np.sort(tri_mat, axis=None)
            top_num = np.round(args.rho * tri_mat.size).astype(int)
            eps = tri_mat[:top_num].mean()
            print('eps in cluster: {:.3f}'.format(eps))
            cluster = DBSCAN(eps=eps,
                             min_samples=4,
                             metric='precomputed',
                             n_jobs=8)

            # HDBSCAN cluster
            import hdbscan
            cluster_hdbscan = hdbscan.HDBSCAN(min_cluster_size=10,
                                              min_samples=4,
                                              metric='precomputed')

        # select & cluster images as training set of this epochs
        print('Clustering and labeling...')
        if args.use_hdbscan_clustering:
            print(
                'Use the better chlustering algorithm HDBSCAN for clustering')
            labels = cluster_hdbscan.fit_predict(rerank_dist)
        else:
            print('Use DBSCAN for clustering')
            labels = cluster.fit_predict(rerank_dist)
        num_ids = len(set(labels)) - 1
        print('Iteration {} have {} training ids'.format(iter_n + 1, num_ids))

        # generate new dataset
        new_dataset = []
        for (fname, _, _), label in zip(tgt_dataset.trainval, labels):
            if label == -1:
                continue
            # dont need to change codes in trainer.py _parsing_input function and sampler function after add 0
            new_dataset.append((fname, label, 0))
        print('Iteration {} have {} training images'.format(
            iter_n + 1, len(new_dataset)))

        train_loader = DataLoader(Preprocessor(new_dataset,
                                               root=tgt_dataset.images_dir,
                                               transform=train_transformer),
                                  batch_size=args.batch_size,
                                  num_workers=4,
                                  sampler=RandomIdentitySampler(
                                      new_dataset, args.num_instances),
                                  pin_memory=True,
                                  drop_last=True)

        # train model with new generated dataset
        trainer = Trainer(model, criterion, print_freq=args.print_freq)
        evaluator = Evaluator(model, print_freq=args.print_freq)
        # Start training
        for epoch in range(args.epochs):
            trainer.train(epoch, train_loader, optimizer)

        # Evaluate
        rank_score = evaluator.evaluate(test_loader, tgt_dataset.query,
                                        tgt_dataset.gallery)

        #Save the best ckpt:
        rank1 = rank_score.market1501[0]
        mAP = rank_score.map
        is_best_mAP = mAP > best_map
        best_map = max(mAP, best_map)
        save_checkpoint(
            {
                'state_dict': model.module.state_dict(),
                'epoch': iter_n + 1,
                'best_mAP': best_map,
                # 'num_ids': num_ids,
            },
            is_best_mAP,
            fpath=osp.join(args.logs_dir, 'checkpoint.pth.tar'))

        print(
            '\n * Finished epoch {:3d}  top1: {:5.1%}  mAP: {:5.1%}  best_mAP: {:5.1%}{}\n'
            .format(iter_n + 1, rank1, mAP, best_map,
                    ' *' if is_best_mAP else ''))

    return (rank_score.map, rank_score.market1501[0])
예제 #7
0
        try:
            modelTest.load_state_dict(checkTgt)
        except:
            allNames = list(checkTgt.keys())
            for name in allNames:
                if name.count('classifier') != 0:
                    del checkTgt[name]
            modelTest.load_state_dict(checkTgt, strict=False)

    model.eval()
    modelTest.eval()
    if torch.cuda.is_available():
        model = model.cuda()
        modelTest = modelTest.cuda()

    features, _ = extract_features(model, mix_loader, print_freq=10)
    features = torch.stack([features[f] for f, _, _ in mix_loader.dataset.dataset])

    ncentroids = 512
    fDim = features.shape[1]
    cluster = faiss.Kmeans(fDim, ncentroids, niter=20, gpu=True)
    cluster.train(features.cpu().numpy())
    centroids = torch.from_numpy(cluster.centroids).cuda().float()

    evaluator = Evaluator(modelTest, args.print_freq)
    evaSrc = Evaluator(model, args.print_freq)

    # universal noise
    noise = torch.zeros((3, args.height, args.width)).cuda()
    normalize = T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
예제 #8
0
def get_feature(model, data, data_dir, params):
    dataloader = dp.get_dataloader(data, data_dir, **params)
    features, _ = extract_features(model, dataloader)
    return features
예제 #9
0
def main(args):
    np.random.seed(args.seed)
    torch.manual_seed(args.seed)
    cudnn.benchmark = True

    # Create data loaders
    assert args.num_instances > 1, "num_instances should be greater than 1"
    assert args.batch_size % args.num_instances == 0, \
        'num_instances should divide batch_size'
    if args.height is None or args.width is None:
        args.height, args.width = (144, 56) if args.arch == 'inception' else \
                                  (256, 128)

    # get source data
    src_dataset, src_extfeat_loader = \
        get_source_data(args.src_dataset, args.data_dir, args.height,
                        args.width, args.batch_size, args.workers)
    # get target data
    tgt_dataset, num_classes, tgt_extfeat_loader, test_loader = \
        get_data(args.tgt_dataset, args.data_dir, args.height,
                 args.width, args.batch_size, args.workers)

    # Create model
    # Hacking here to let the classifier be the number of source ids
    if args.src_dataset == 'dukemtmc':
        model = models.create(args.arch, num_classes=632, pretrained=False)
    elif args.src_dataset == 'market1501':
        model = models.create(args.arch, num_classes=676, pretrained=False)
    else:
        raise RuntimeError(
            'Please specify the number of classes (ids) of the network.')

    # Load from checkpoint
    if args.resume:
        print(
            'Resuming checkpoints from finetuned model on another dataset...\n'
        )
        checkpoint = load_checkpoint(args.resume)
        model.load_state_dict(checkpoint['state_dict'], strict=False)
    else:
        raise RuntimeWarning('Not using a pre-trained model.')
    model = nn.DataParallel(model).cuda()

    # evaluator.evaluate(test_loader, tgt_dataset.query, tgt_dataset.gallery)
    # if args.evaluate: return

    # Criterion
    criterion = [
        TripletLoss(args.margin, args.num_instances, isAvg=True,
                    use_semi=True).cuda(),
        TripletLoss(args.margin, args.num_instances, isAvg=True,
                    use_semi=True).cuda(),
    ]

    # Optimizer
    optimizer = torch.optim.Adam(model.parameters(), lr=args.lr)

    # training stage transformer on input images
    normalizer = T.Normalize(mean=[0.485, 0.456, 0.406],
                             std=[0.229, 0.224, 0.225])
    train_transformer = T.Compose([
        T.Resize((args.height, args.width)),
        T.RandomHorizontalFlip(),
        T.ToTensor(), normalizer,
        T.RandomErasing(probability=0.5, sh=0.2, r1=0.3)
    ])

    # # Start training
    for iter_n in range(args.iteration):
        if args.lambda_value == 0:
            source_features = 0
        else:
            # get source datas' feature
            source_features, _ = extract_features(model,
                                                  src_extfeat_loader,
                                                  print_freq=args.print_freq)
            # synchronization feature order with src_dataset.train
            source_features = torch.cat([
                source_features[f].unsqueeze(0)
                for f, _, _ in src_dataset.train
            ], 0)

        # extract training images' features
        print('Iteration {}: Extracting Target Dataset Features...'.format(
            iter_n + 1))
        target_features, tarNames = extract_features(
            model, tgt_extfeat_loader, print_freq=args.print_freq)
        # synchronization feature order with dataset.train
        target_features = torch.cat([
            target_features[f].unsqueeze(0) for f, _, _ in tgt_dataset.trainval
        ], 0)
        target_real_label = np.asarray(
            [tarNames[f].unsqueeze(0) for f, _, _ in tgt_dataset.trainval])
        numTarID = len(set(target_real_label))
        # calculate distance and rerank result
        print('Calculating feature distances...')
        target_features = target_features.numpy()
        cluster = KMeans(n_clusters=numTarID, n_jobs=8, n_init=1)

        # select & cluster images as training set of this epochs
        print('Clustering and labeling...')
        clusterRes = cluster.fit(target_features)
        labels, centers = clusterRes.labels_, clusterRes.cluster_centers_
        # labels = splitLowconfi(target_features,labels,centers)
        # num_ids = len(set(labels))
        # print('Iteration {} have {} training ids'.format(iter_n+1, num_ids))
        # generate new dataset
        new_dataset = []
        for (fname, _, cam), label in zip(tgt_dataset.trainval, labels):
            # if label==-1: continue
            # dont need to change codes in trainer.py _parsing_input function and sampler function after add 0
            new_dataset.append((fname, label, cam))
        print('Iteration {} have {} training images'.format(
            iter_n + 1, len(new_dataset)))
        train_loader = DataLoader(Preprocessor(new_dataset,
                                               root=tgt_dataset.images_dir,
                                               transform=train_transformer),
                                  batch_size=args.batch_size,
                                  num_workers=4,
                                  sampler=RandomIdentitySampler(
                                      new_dataset, args.num_instances),
                                  pin_memory=True,
                                  drop_last=True)

        # train model with new generated dataset
        trainer = Trainer(model, criterion)

        evaluator = Evaluator(model, print_freq=args.print_freq)

        # Start training
        for epoch in range(args.epochs):
            # trainer.train(epoch, remRate=0.2+(0.6/args.iteration)*(1+iter_n)) # to at most 80%
            trainer.train(epoch, train_loader, optimizer)
        # test only
        rank_score = evaluator.evaluate(test_loader, tgt_dataset.query,
                                        tgt_dataset.gallery)
        #print('co-model:\n')
        #rank_score = evaluatorB.evaluate(test_loader, tgt_dataset.query, tgt_dataset.gallery)

    # Evaluate
    rank_score = evaluator.evaluate(test_loader, tgt_dataset.query,
                                    tgt_dataset.gallery)
    save_checkpoint(
        {
            'state_dict': model.module.state_dict(),
            'epoch': epoch + 1,
            'best_top1': rank_score.market1501[0],
        },
        True,
        fpath=osp.join(args.logs_dir, 'adapted.pth.tar'))
    return (rank_score.map, rank_score.market1501[0])
예제 #10
0
def main(args):
    np.random.seed(args.seed)
    torch.manual_seed(args.seed)
    cudnn.benchmark = True

    # Create data loaders
    assert args.num_instances > 1, "num_instances should be greater than 1"
    assert args.batch_size % args.num_instances == 0, \
        'num_instances should divide batch_size'
    if args.height is None or args.width is None:
        args.height, args.width = (144, 56) if args.arch == 'inception' else \
                                  (256, 128)

    # get source data
    src_dataset, src_extfeat_loader = \
        get_source_data(args.src_dataset, args.data_dir, args.height,
                        args.width, args.batch_size, args.workers)
    # get target data
    tgt_dataset, num_classes, tgt_extfeat_loader, test_loader = \
        get_data(args.tgt_dataset, args.data_dir, args.height,
                 args.width, args.batch_size, args.workers)

    # Create model
    # Hacking here to let the classifier be the number of source ids
    if args.src_dataset == 'dukemtmc':
        model = models.create(args.arch, num_classes=632, pretrained=False)
    elif args.src_dataset == 'market1501':
        model = models.create(args.arch, num_classes=676, pretrained=False)
    else:
        raise RuntimeError(
            'Please specify the number of classes (ids) of the network.')

    # Load from checkpoint
    if args.resume:
        print(
            'Resuming checkpoints from finetuned model on another dataset...\n'
        )
        checkpoint = load_checkpoint(args.resume)
        model.load_state_dict(checkpoint['state_dict'], strict=False)
    else:
        raise RuntimeWarning('Not using a pre-trained model.')
    model = nn.DataParallel(model).cuda()

    # evaluator.evaluate(test_loader, tgt_dataset.query, tgt_dataset.gallery)
    # if args.evaluate: return

    # Criterion
    criterion = [
        TripletLoss(args.margin, args.num_instances, isAvg=True,
                    use_semi=True).cuda(),
        TripletLoss(args.margin, args.num_instances, isAvg=True,
                    use_semi=True).cuda(), None, None
    ]

    # Optimizer
    optimizer = torch.optim.Adam(model.parameters(), lr=args.lr)

    # training stage transformer on input images
    normalizer = T.Normalize(mean=[0.485, 0.456, 0.406],
                             std=[0.229, 0.224, 0.225])
    train_transformer = T.Compose([
        T.Resize((args.height, args.width)),
        T.RandomHorizontalFlip(),
        T.ToTensor(), normalizer,
        T.RandomErasing(probability=0.5, sh=0.2, r1=0.3)
    ])

    evaluator = Evaluator(model, print_freq=args.print_freq)
    evaluator.evaluate(test_loader, tgt_dataset.query, tgt_dataset.gallery)

    # # Start training
    for iter_n in range(args.iteration):
        if args.lambda_value == 0:
            source_features = 0
        else:
            # get source datas' feature
            source_features, _ = extract_features(model,
                                                  src_extfeat_loader,
                                                  print_freq=args.print_freq)
            # synchronization feature order with src_dataset.train
            source_features = torch.cat([
                source_features[f].unsqueeze(0)
                for f, _, _, _ in src_dataset.train
            ], 0)

        # extract training images' features
        print('Iteration {}: Extracting Target Dataset Features...'.format(
            iter_n + 1))
        target_features, tarNames = extract_features(
            model, tgt_extfeat_loader, print_freq=args.print_freq)
        # synchronization feature order with dataset.train
        target_features = torch.cat([
            target_features[f].unsqueeze(0)
            for f, _, _, _ in tgt_dataset.trainval
        ], 0)
        # target_real_label = np.asarray([tarNames[f].unsqueeze(0) for f, _, _, _ in tgt_dataset.trainval])

        # calculate distance and rerank result
        # method 1
        target_features = target_features.numpy()
        rerank_dist = re_ranking(source_features,
                                 target_features,
                                 lambda_value=args.lambda_value)

        # method 2
        # distmat_qq = calDis(source_features, source_features)
        # distmat_qg = calDis(source_features, target_features)
        # distmat_gg = calDis(target_features, target_features)
        # rerank_dist = re_ranking2(distmat_qg.numpy(), distmat_qq.numpy(), distmat_gg.numpy())

        cluster = HDBSCAN(metric='precomputed', min_samples=10)
        # select & cluster images as training set of this epochs
        clusterRes = cluster.fit(rerank_dist)
        labels, label_num = clusterRes.labels_, clusterRes.labels_.max() + 1
        centers = np.zeros((label_num, target_features.shape[1]))
        nums = [0] * target_features.shape[1]
        print('clusters num =', label_num)

        # generate new dataset
        new_dataset = []
        index = -1
        for (fname, _, cam, timestamp), label in zip(tgt_dataset.trainval,
                                                     labels):
            index += 1
            if label == -1: continue
            # dont need to change codes in trainer.py _parsing_input function and sampler function after add 0
            new_dataset.append((fname, label, cam, timestamp))
            centers[label] += target_features[index]
            nums[label] += 1
        print('Iteration {} have {} training images'.format(
            iter_n + 1, len(new_dataset)))

        train_loader = DataLoader(Preprocessor(new_dataset,
                                               root=tgt_dataset.images_dir,
                                               transform=train_transformer),
                                  batch_size=args.batch_size,
                                  num_workers=4,
                                  sampler=RandomIdentitySampler(
                                      new_dataset, args.num_instances),
                                  pin_memory=True,
                                  drop_last=True)

        for i in range(label_num):
            centers[i] /= nums[i]
        criterion[3] = ClassificationLoss(normalize(centers, axis=1)).cuda()

        classOptimizer = torch.optim.Adam(
            [{
                'params': model.parameters()
            }, {
                'params': criterion[3].classifier.parameters(),
                'lr': 1e-3
            }],
            lr=args.lr)

        class_trainer = ClassificationTrainer(model, train_loader, criterion,
                                              classOptimizer)

        for epoch in range(args.epochs):
            class_trainer.train(epoch)

        rank_score = evaluator.evaluate(test_loader, tgt_dataset.query,
                                        tgt_dataset.gallery)

    # Evaluate
    rank_score = evaluator.evaluate(test_loader, tgt_dataset.query,
                                    tgt_dataset.gallery)
    save_checkpoint(
        {
            'state_dict': model.module.state_dict(),
            'epoch': epoch + 1,
            'best_top1': rank_score.market1501[0],
        },
        True,
        fpath=osp.join(args.logs_dir, 'adapted.pth.tar'))
    return (rank_score.map, rank_score.market1501[0])
예제 #11
0
파일: selftraining.py 프로젝트: zjc0212/SSG
def main(args):
    os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu_devices
    np.random.seed(args.seed)
    torch.manual_seed(args.seed)
    cudnn.benchmark = True

    # Redirect print to both console and log file
    if not args.evaluate:
        sys.stdout = Logger(osp.join(args.logs_dir, 'log.txt'))

    # Create data loaders
    assert args.num_instances > 1, "num_instances should be greater than 1"
    assert args.batch_size % args.num_instances == 0, \
        'num_instances should divide batch_size'
    if args.height is None or args.width is None:
        args.height, args.width = (144, 56) if args.arch == 'inception' else \
                                  (256, 128)

    ## get_source_data
    src_dataset, src_extfeat_loader = \
        get_source_data(args.src_dataset, args.data_dir, args.height,
                        args.width, args.batch_size, args.workers)
    # get_target_data
    tgt_dataset, num_classes, tgt_extfeat_loader, test_loader = \
        get_data(args.tgt_dataset, args.data_dir, args.height,
                 args.width, args.batch_size, args.workers)

    # Create model
    # Hacking here to let the classifier be the last feature embedding layer
    # Net structure: avgpool -> FC(2048) -> FC(args.features)
    if args.src_dataset == 'dukemtmc':
        model = models.create(args.arch,
                              num_classes=0,
                              num_split=args.num_split,
                              cluster=args.dce_loss)  #duke
    elif args.src_dataset == 'market1501':
        model = models.create(args.arch,
                              num_classes=0,
                              num_split=args.num_split,
                              cluster=args.dce_loss)
    else:
        raise RuntimeError(
            'Please specify the number of classes (ids) of the network.')

    # Load from checkpoint
    start_epoch = best_top1 = 0
    if args.resume:
        print(
            'Resuming checkpoints from finetuned model on another dataset...\n'
        )
        checkpoint = load_checkpoint(args.resume)
        model.load_state_dict(checkpoint, strict=False)
    else:
        raise RuntimeWarning('Not using a pre-trained model')
    model = nn.DataParallel(model).cuda()

    # Distance metric
    metric = DistanceMetric(algorithm=args.dist_metric)

    # Evaluator
    evaluator = Evaluator(model, print_freq=args.print_freq)
    print("Test with the original model trained on source domain:")
    best_top1 = evaluator.evaluate(test_loader, tgt_dataset.query,
                                   tgt_dataset.gallery)
    if args.evaluate:
        return

    # Criterion
    criterion = []
    criterion.append(
        TripletLoss(margin=args.margin,
                    num_instances=args.num_instances).cuda())
    criterion.append(
        TripletLoss(margin=args.margin,
                    num_instances=args.num_instances).cuda())

    #multi lr
    base_param_ids = set(map(id, model.module.base.parameters()))
    new_params = [p for p in model.parameters() if id(p) not in base_param_ids]
    param_groups = [{
        'params': model.module.base.parameters(),
        'lr_mult': 1.0
    }, {
        'params': new_params,
        'lr_mult': 1.0
    }]
    # Optimizer
    optimizer = torch.optim.SGD(param_groups,
                                lr=args.lr,
                                momentum=0.9,
                                weight_decay=args.weight_decay)

    ##### adjust lr
    def adjust_lr(epoch):
        if epoch <= 7:
            lr = args.lr
        elif epoch <= 14:
            lr = 0.3 * args.lr
        else:
            lr = 0.1 * args.lr
        for g in optimizer.param_groups:
            g['lr'] = lr * g.get('lr_mult', 1)

    ##### training stage transformer on input images
    normalizer = T.Normalize(mean=[0.485, 0.456, 0.406],
                             std=[0.229, 0.224, 0.225])
    train_transformer = T.Compose([
        Resize((args.height, args.width)),
        T.RandomHorizontalFlip(),
        T.ToTensor(), normalizer,
        T.RandomErasing(probability=0.5, sh=0.2, r1=0.3)
    ])

    # Start training
    iter_nums = args.iteration
    cluster_list = []
    top_percent = args.rho
    for iter_n in range(0, iter_nums):
        #### get source datas' feature
        if args.load_dist and iter_n == 0:
            dist = pickle.load(
                open('dist' + str(args.num_split) + '.pkl', 'rb'))
            euclidean_dist_list = dist['euclidean']
            rerank_dist_list = dist['rerank']
        else:
            source_features, _ = extract_features(model,
                                                  src_extfeat_loader,
                                                  for_eval=False)
            if isinstance(source_features[src_dataset.trainval[0][0]], list):
                len_f = len(source_features[src_dataset.trainval[0][0]])
                source_features = [
                    torch.cat([
                        source_features[f][i].unsqueeze(0)
                        for f, _, _ in src_dataset.trainval
                    ], 0) for i in range(len_f)
                ]
            else:
                source_features = torch.cat([
                    source_features[f].unsqueeze(0)
                    for f, _, _ in src_dataset.trainval
                ], 0)  # synchronization feature order with s_dataset.trainval
            #### extract training images' features
            print('Iteration {}: Extracting Target Dataset Features...'.format(
                iter_n + 1))
            target_features, _ = extract_features(model,
                                                  tgt_extfeat_loader,
                                                  for_eval=False)
            if isinstance(target_features[tgt_dataset.trainval[0][0]], list):
                len_f = len(target_features[tgt_dataset.trainval[0][0]])
                target_features = [
                    torch.cat([
                        target_features[f][i].unsqueeze(0)
                        for f, _, _ in tgt_dataset.trainval
                    ], 0) for i in range(len_f)
                ]
            else:
                target_features = torch.cat([
                    target_features[f].unsqueeze(0)
                    for f, _, _ in tgt_dataset.trainval
                ], 0)  # synchronization feature order with dataset.trainval
            #### calculate distance and rerank result
            print('Calculating feature distances...')
            # target_features = target_features.numpy()
            euclidean_dist_list, rerank_dist_list = compute_dist(
                source_features,
                target_features,
                lambda_value=args.lambda_value,
                no_rerank=args.no_rerank,
                num_split=args.num_split)  # lambda=1 means only source dist
            del target_features
            del source_features

        labels_list, cluster_list = generate_selflabel(euclidean_dist_list,
                                                       rerank_dist_list,
                                                       iter_n, args,
                                                       cluster_list)
        #### generate new dataset
        train_loader_list = generate_dataloader(tgt_dataset, labels_list,
                                                train_transformer, iter_n,
                                                args)
        del labels_list
        # del cluster_list
        top1 = iter_trainer(model, tgt_dataset, train_loader_list, test_loader,
                            optimizer, criterion, args.epochs, args.logs_dir,
                            args.print_freq)

        is_best = top1 > best_top1
        best_top1 = max(top1, best_top1)
        save_checkpoint(
            {
                'state_dict': model.module.state_dict(),
                'epoch': iter_n + 1,
                'best_top1': best_top1,
                # 'num_ids': num_ids,
            },
            is_best,
            fpath=osp.join(args.logs_dir, 'checkpoint.pth.tar'))

        print('\n * Finished epoch {:3d}  top1: {:5.1%}  best: {:5.1%}{}\n'.
              format(iter_n + 1, top1, best_top1, ' *' if is_best else ''))
예제 #12
0
def main_worker(args):
    global start_epoch, best_mAP
    start_time = time.monotonic()

    cudnn.benchmark = True

    sys.stdout = Logger(osp.join(args.logs_dir, 'log.txt'))
    print("==========\nArgs:{}\n==========".format(args))

    # Create datasets
    iters = args.iters if (args.iters > 0) else None
    print("==> Load unlabeled dataset")
    dataset = get_data(args.dataset, args.data_dir)
    test_loader = get_test_loader(dataset, args.height, args.width,
                                  args.batch_size, args.workers)

    # Create model
    model = create_model(args)

    # Evaluator
    evaluator = Evaluator(model)
    if args.evaluate:
        evaluator.evaluate(test_loader,
                           dataset.query,
                           dataset.gallery,
                           cmc_flag=True)
        return

    # # for vis
    # marCamSet = get_data('marCam', args.data_dir)
    # mar_loader = get_plot_loader(marCamSet, args.height, args.width,
    #                              args.batch_size, args.workers, test_set=marCamSet.train)

    # Create feature memory
    memory = nn.DataParallel(
        Memory(2048,
               len(dataset.train),
               temp=args.temp,
               momentum=args.momentum)).cuda()

    # Initialize target-domain instance features
    print("==> Initialize instance features in the feature memory")
    cluster_loader = get_test_loader(dataset,
                                     args.height,
                                     args.width,
                                     args.batch_size,
                                     args.workers,
                                     testset=sorted(dataset.train))
    features, _ = extract_features(model, cluster_loader, print_freq=50)
    features = torch.cat(
        [features[f].unsqueeze(0) for f, _, _ in sorted(dataset.train)], 0)
    memory.module.features = F.normalize(features, dim=1).cuda()

    del cluster_loader

    # optimizer for meta models
    params = [{
        "params": [value]
    } for value in model.module.params() if value.requires_grad]
    optimizer = torch.optim.Adam(params,
                                 lr=args.lr,
                                 weight_decay=args.weight_decay)
    lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer,
                                                   step_size=args.step_size,
                                                   gamma=0.1)

    # Trainer
    trainer = Trainer_USL(model, memory)
    cluster = DBSCAN(eps=args.eps,
                     min_samples=4,
                     metric='precomputed',
                     n_jobs=-1)
    # instance pre-training
    pseudo_labeled_dataset = []
    pseudo_labels = torch.arange(len(dataset.train))
    for i, ((fname, _, cid),
            label) in enumerate(zip(sorted(dataset.train), pseudo_labels)):
        pseudo_labeled_dataset.append((fname, label.item(), cid))
    for epoch in range(args.startE):
        torch.cuda.empty_cache()
        memory.module.labels = pseudo_labels.cuda()
        train_loader = get_train_loader(dataset.images_dir,
                                        args.height,
                                        args.width,
                                        args.batch_size,
                                        args.workers,
                                        -1,
                                        iters,
                                        trainset=pseudo_labeled_dataset)
        print(f'-----Exemplar Pretraining, Epoch{epoch}...------')
        trainer.train(epoch,
                      train_loader,
                      optimizer,
                      print_freq=args.print_freq,
                      train_iters=args.iters)
    # test pre-train
    evaluator.evaluate(test_loader,
                       dataset.query,
                       dataset.gallery,
                       cmc_flag=False)
    # start training
    for epoch in range(args.epochs):
        # Calculate distance
        torch.cuda.empty_cache()
        features = memory.module.features.clone()
        rerank_dist = compute_jaccard_distance(features,
                                               k1=args.k1,
                                               k2=args.k2)

        # select & cluster images as training set of this epochs
        pseudo_labels = cluster.fit_predict(rerank_dist)

        # generate new dataset and calculate cluster centers
        pseudo_labels = generate_pseudo_labels(pseudo_labels, features)
        pseudo_labeled_dataset = []
        for i, ((fname, _, cid),
                label) in enumerate(zip(sorted(dataset.train), pseudo_labels)):
            pseudo_labeled_dataset.append((fname, label.item(), cid))
        # statistics of clusters and un-clustered instances
        memory.module.labels = pseudo_labels.cuda()
        train_loader = get_train_loader(dataset.images_dir,
                                        args.height,
                                        args.width,
                                        args.batch_size,
                                        args.workers,
                                        args.num_instances,
                                        iters,
                                        trainset=pseudo_labeled_dataset)
        trainer.train(epoch,
                      train_loader,
                      optimizer,
                      print_freq=args.print_freq,
                      train_iters=args.iters,
                      symmetric=args.symmetric)
        if (epoch + 1) % args.eval_step == 0 or (epoch == args.epochs - 1):
            mAP = evaluator.evaluate(test_loader,
                                     dataset.query,
                                     dataset.gallery,
                                     cmc_flag=False)
            is_best = (mAP > best_mAP)
            best_mAP = max(mAP, best_mAP)
            save_checkpoint(
                {
                    'state_dict': model.state_dict(),
                    'epoch': epoch + 1,
                    'best_mAP': best_mAP,
                },
                is_best,
                fpath=osp.join(args.logs_dir, 'checkpoint.pth.tar'))
            # # for vis
            # mar_feature, _ = extract_features(model, mar_loader, print_freq=args.print_freq)
            # mar_feature = torch.stack([mar_feature[f] for f, _, _ in marCamSet.train], 0)
            # marPid, marCam = [pid for _, pid, _ in marCamSet.train], \
            #                  [cam for _, _, cam in marCamSet.train]
            # tsneCam = plotTSNE(mar_feature, marPid, marCam, f'{epoch}.jpg')
            # io.savemat(f'{epoch}.mat', {'tsneCam': tsneCam, 'marPid': marPid, 'marCam': marCam})

            print(
                '\n * Finished epoch {:3d}  model mAP: {:5.1%}  best: {:5.1%}{}\n'
                .format(epoch, mAP, best_mAP, ' *' if is_best else ''))

        lr_scheduler.step()
    print('==> Test with the best model:')
    checkpoint = load_checkpoint(osp.join(args.logs_dir, 'model_best.pth.tar'))
    model.load_state_dict(checkpoint['state_dict'])
    evaluator.evaluate(test_loader,
                       dataset.query,
                       dataset.gallery,
                       cmc_flag=True)
    end_time = time.monotonic()
    print('Total running time: ', timedelta(seconds=end_time - start_time))
예제 #13
0
def main(args):
    np.random.seed(args.seed)
    torch.manual_seed(args.seed)
    cudnn.benchmark = True

    # Create data loaders
    assert args.num_instances > 1, "num_instances should be greater than 1"
    assert args.batch_size % args.num_instances == 0, \
        'num_instances should divide batch_size'
    if args.height is None or args.width is None:
        args.height, args.width = (144, 56) if args.arch == 'inception' else \
                                  (256, 128)

    # get source data
    src_dataset, src_extfeat_loader = \
        get_source_data(args.src_dataset, args.data_dir, args.height,
                        args.width, args.batch_size, args.workers)
    # get target data
    tgt_dataset, num_classes, tgt_extfeat_loader, test_loader = \
        get_data(args.tgt_dataset, args.data_dir, args.height,
                 args.width, args.batch_size, args.workers)

    # Create model
    # Hacking here to let the classifier be the number of source ids
    if args.src_dataset == 'dukemtmc':
        model = models.create(args.arch, num_classes=632, pretrained=False)
    elif args.src_dataset == 'market1501':
        model = models.create(args.arch, num_classes=676, pretrained=False)
    else:
        raise RuntimeError(
            'Please specify the number of classes (ids) of the network.')

    # Load from checkpoint
    if args.resume:
        print(
            'Resuming checkpoints from finetuned model on another dataset...\n'
        )
        checkpoint = load_checkpoint(args.resume)
        model.load_state_dict(checkpoint['state_dict'], strict=False)
    else:
        raise RuntimeWarning('Not using a pre-trained model.')
    model = nn.DataParallel(model).cuda()

    # evaluator.evaluate(test_loader, tgt_dataset.query, tgt_dataset.gallery)
    # if args.evaluate: return

    # Criterion
    criterion = [SortedTripletLoss(args.margin, isAvg=True).cuda()]

    # Optimizer
    optimizer = torch.optim.Adam(model.parameters(), lr=args.lr)

    # training stage transformer on input images
    normalizer = T.Normalize(mean=[0.485, 0.456, 0.406],
                             std=[0.229, 0.224, 0.225])
    train_transformer = T.Compose([
        T.Resize((args.height, args.width)),
        T.RandomHorizontalFlip(),
        T.ToTensor(), normalizer,
        T.RandomErasing(probability=0.5, sh=0.2, r1=0.3)
    ])

    evaluator = Evaluator(model, print_freq=args.print_freq)
    evaluator.evaluate(test_loader, tgt_dataset.query, tgt_dataset.gallery)

    st_model = ST_Model(tgt_dataset.meta['num_cameras'])

    # # Start training
    for iter_n in range(args.iteration):
        if args.lambda_value == 0:
            source_features = 0
        else:
            # get source datas' feature
            source_features, _ = extract_features(model,
                                                  src_extfeat_loader,
                                                  print_freq=args.print_freq)
            # synchronization feature order with src_dataset.train
            source_features = torch.cat([
                source_features[f].unsqueeze(0)
                for f, _, _, _ in src_dataset.train
            ], 0)

        # extract training images' features
        print('Iteration {}: Extracting Target Dataset Features...'.format(
            iter_n + 1))
        target_features, tarNames = extract_features(
            model, tgt_extfeat_loader, print_freq=args.print_freq)
        # synchronization feature order with dataset.train
        target_features = torch.cat([
            target_features[f].unsqueeze(0)
            for f, _, _, _ in tgt_dataset.trainval
        ], 0)
        # target_real_label = np.asarray([tarNames[f].unsqueeze(0) for f, _, _, _ in tgt_dataset.trainval])

        # calculate distance and rerank result
        target_features = target_features.numpy()
        rerank_dist = re_ranking(source_features,
                                 target_features,
                                 lambda_value=args.lambda_value)

        # if iter_n > 0:
        #     rerank_dist = st_model.apply(rerank_dist, tgt_dataset.trainval, tgt_dataset.trainval)

        cluster = HDBSCAN(metric='precomputed', min_samples=10)
        # select & cluster images as training set of this epochs
        clusterRes = cluster.fit(rerank_dist)
        labels, label_num = clusterRes.labels_, clusterRes.labels_.max() + 1
        # centers = np.zeros((label_num, target_features.shape[1]))
        # nums = [0] * target_features.shape[1]
        print('clusters num =', label_num)

        # generate new dataset
        new_dataset = []
        index = -1
        for (fname, _, cam, timestamp), label in zip(tgt_dataset.trainval,
                                                     labels):
            index += 1
            if label == -1: continue
            # dont need to change codes in trainer.py _parsing_input function and sampler function after add 0
            new_dataset.append((fname, label, cam, timestamp))
            # centers[label] += target_features[index]
            # nums[label] += 1
        print('Iteration {} have {} training images'.format(
            iter_n + 1, len(new_dataset)))

        # learn ST model
        same, _ = st_model.fit(new_dataset)

        # st_model.fit(tgt_dataset.trainval)

        def filter(i, j):
            _, _, c1, t1 = tgt_dataset.trainval[i]
            _, _, c2, t2 = tgt_dataset.trainval[j]
            return same.in_peak(c1, c2, t1, t2, 0.2)

        ranking = np.argsort(rerank_dist)[:, 1:]

        cluster_size = 23.535612535612536
        must_conn = int(cluster_size / 2)
        might_conn = int(cluster_size * 2)

        length = len(tgt_dataset.trainval)
        pos = [[] for _ in range(length)]
        neg = [[] for _ in range(length)]
        for i in range(length):
            for j_ in range(might_conn):
                j = ranking[i][j_]
                if j_ < must_conn and i in ranking[j][:must_conn]:
                    pos[i].append(j)
                elif i in ranking[j][:might_conn] and filter(i, j):
                    pos[i].append(j)
                # if j_ < must_conn or filter(i, j):
                #     pos[i].append(j)
                else:
                    neg[i].append(j)
            if len(neg[i]) < len(pos[i]):
                neg[i].extend(ranking[i][j_ + 1:j_ + 1 + len(pos[i]) -
                                         len(neg[i])])

        # learn visual model
        # for i in range(label_num):
        #     centers[i] /= nums[i]
        # criterion[3] = ClassificationLoss(normalize(centers, axis=1)).cuda()
        #
        # classOptimizer = torch.optim.Adam([
        #     {'params': model.parameters()},
        #     {'params': criterion[3].classifier.parameters(), 'lr': 1e-3}
        # ], lr=args.lr)

        train_loader = DataLoader(Preprocessor(tgt_dataset.trainval,
                                               root=tgt_dataset.images_dir,
                                               transform=train_transformer),
                                  batch_size=args.batch_size,
                                  num_workers=4,
                                  sampler=TripletSampler(
                                      tgt_dataset.trainval, pos, neg),
                                  pin_memory=True,
                                  drop_last=True)

        trainer = Trainer(model, train_loader, criterion, optimizer)

        for epoch in range(args.epochs):
            trainer.train(epoch)

        rank_score = evaluator.evaluate(test_loader, tgt_dataset.query,
                                        tgt_dataset.gallery)

    # Evaluate
    rank_score = evaluator.evaluate(test_loader, tgt_dataset.query,
                                    tgt_dataset.gallery)
    save_checkpoint(
        {
            'state_dict': model.module.state_dict(),
            'epoch': epoch + 1,
            'best_top1': rank_score.market1501[0],
        },
        True,
        fpath=osp.join(args.logs_dir, 'adapted.pth.tar'))
    return (rank_score.map, rank_score.market1501[0])