Exemple #1
0
def train(args):
    # set seed
    torch.manual_seed(2)
    torch.cuda.manual_seed_all(2)
    np.random.seed(2)

    # create model
    model = API_Net(args.device)
    model = model.to(args.device)
    model.conv = nn.DataParallel(model.conv)

    # define optimizer
    optimizer = torch.optim.SGD(model.parameters(),
                                args.lr,
                                momentum=args.momentum,
                                weight_decay=args.weight_decay)

    # resume the previous state of training if possible
    if args.resume:
        if os.path.isfile(args.resume):
            print('loading checkpoint {}'.format(args.resume))
            checkpoint = torch.load(args.resume)
            args.start_epoch = checkpoint['epoch']
            args.best_prec1 = checkpoint['best_prec1']
            model.load_state_dict(checkpoint['state_dict'])
            optimizer.load_state_dict(checkpoint['optimizer'])
            print('loaded checkpoint {}(epoch {})'.format(
                args.resume, checkpoint['epoch']))
            print('best acc:', args.best_prec1)
        else:
            print('no checkpoint found at {}'.format(args.resume))

    cudnn.benchmark = True

    # Load Data
    train_dataset = BatchDataset(args.KFold, args.nFold, transform=train_tfms)
    train_sampler = BalancedBatchSampler(train_dataset, args.n_classes,
                                         args.n_samples)
    train_loader = torch.utils.data.DataLoader(train_dataset,
                                               batch_sampler=train_sampler,
                                               num_workers=15)

    val_dataset = RandomDataset(args.KFold, args.nFold, transform=val_tfms)
    val_loader = torch.utils.data.DataLoader(val_dataset,
                                             batch_size=args.batch_size,
                                             shuffle=False,
                                             num_workers=15)

    # Initialize Schedular
    scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(
        optimizer, args.epochs)
    for _ in range(args.start_epoch):
        scheduler.step()

    print("Let's use", torch.cuda.device_count(), "GPUs!")
    for epoch in range(args.start_epoch, args.epochs):
        train_one_epoch(train_loader, model, optimizer, epoch, val_loader,
                        args)
        scheduler.step()
Exemple #2
0
def train(train_path, labels, boxes, output_dir, num_epochs, hard, verbose):
    df_train, df_val = _get_toy_dataset(labels, boxes)

    if verbose:
        logging.info("Train size: {}, validation size: {}".format(
            len(df_train), len(df_val)))

    sampler = None
    if hard:
        sampler = BalancedBatchSampler(df_train, n_classes=4, n_samples=4)
    train_dl, single_train_dl, val_dl = get_dataloaders(
        df_train, df_val, train_path, sampler)

    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    if verbose:
        logging.info("Using device {}".format(device))

    net = TripletNetwork(embedding_size=128).to(device)
    criterion = TripletLoss()
    selector = None
    if hard:
        selector = HardBatchTripletSelector()
    optimizer = optim.Adam(net.parameters(), lr=1e-4)

    net, history = _train(
        model=net,
        optimizer=optimizer,
        criterion=criterion,
        train_dataloader=train_dl,
        single_train_dataloader=single_train_dl,
        val_dataloader=val_dl,
        num_epochs=num_epochs,
        save_path=output_dir,
        device=device,
        selector=selector,
    )
    _plot_history(history)
n_epochs = 20
log_interval = 100

fit(triplet_train_loader, triplet_test_loader, model, loss_fn, optimizer,
    scheduler, n_epochs, cuda, log_interval)

train_embeddings_tl, train_labels_tl = extract_embeddings(train_loader, model)
plot_embeddings(train_embeddings_tl, train_labels_tl)
val_embeddings_tl, val_labels_tl = extract_embeddings(test_loader, model)
plot_embeddings(val_embeddings_tl, val_labels_tl)

from datasets import BalancedBatchSampler

# We'll create mini batches by sampling labels that will be present in the mini batch and number of examples from each class
train_batch_sampler = BalancedBatchSampler(train_dataset,
                                           n_classes=10,
                                           n_samples=25)
test_batch_sampler = BalancedBatchSampler(test_dataset,
                                          n_classes=10,
                                          n_samples=25)

kwargs = {'num_workers': 1, 'pin_memory': True} if cuda else {}
online_train_loader = torch.utils.data.DataLoader(
    train_dataset, batch_sampler=train_batch_sampler, **kwargs)
online_test_loader = torch.utils.data.DataLoader(
    test_dataset, batch_sampler=test_batch_sampler, **kwargs)

# Set up the network and training parameters
from networks import EmbeddingNet
from losses import OnlineContrastiveLoss
from utils import AllPositivePairSelector, HardNegativePairSelector  # Strategies for selecting pairs within a minibatch
Exemple #4
0
    if torch.cuda.device_count() > 1:
        model = nn.DataParallel(model)

    if config.mode == 'train':

        #torch.autograd.set_detect_anomaly(True)

        """ Load data """
        print('dataset path', dataset_path)
        train_dataset_path = dataset_path #+ '/train/train_data'

        img_dataset = train_data_loader(data_path=train_dataset_path, img_size=input_size,
                                        use_augment=use_augmentation)

        # Balanced batch sampler and online train loader
        train_batch_sampler = BalancedBatchSampler(img_dataset, n_classes=num_classes, n_samples=num_samples)
        #train_batch_sampler = NegativeClassMiningBatchSampler(img_dataset, n_classes=num_classes, n_samples=num_samples)
        online_train_loader = torch.utils.data.DataLoader(img_dataset,
                                                          batch_sampler=train_batch_sampler,
                                                          num_workers=4,
                                                          pin_memory=True)

        device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

        # Gather the parameters to be optimized/updated.
        params_to_update = model.parameters()
        print("Params to learn:")
        if feature_extracting:
            params_to_update = []
            for name, param in model.named_parameters():
                if param.requires_grad:
cfg_fname = 'configs/retriever_in_shop/triplet_hnm.py'  # Triplet network with hard negative mining
# cfg_fname = 'configs/retriever_in_shop/triplet_vanilla.py' # Triplet network

cfg = Config.fromfile(cfg_fname)
cuda = True

from datasets import BalancedBatchSampler
# Datasets
train_set = build_dataset(cfg.data.train)
query_set = build_dataset(cfg.data.query)
gallery_set = build_dataset(cfg.data.gallery)
print('datasets loaded')

# Mini batch selector
train_batch_sampler = BalancedBatchSampler(torch.tensor(
    train_set.train_labels),
                                           n_classes=8,
                                           n_samples=12)
test_batch_sampler = BalancedBatchSampler(torch.tensor(query_set.train_labels),
                                          n_classes=8,
                                          n_samples=12)

# Dataloaders
kwargs = {'num_workers': 1, 'pin_memory': True} if cuda else {}
online_train_loader = torch.utils.data.DataLoader(
    train_set, batch_sampler=train_batch_sampler, **kwargs)
online_test_loader = torch.utils.data.DataLoader(
    query_set, batch_sampler=test_batch_sampler, **kwargs)
print('dataloaders built')

# Build model and load checkpoint
# model = build_retriever(cfg.model)
Exemple #6
0
model = embedding_net

if cuda:
    model.cuda()
loss_fn = OnlineTripletLoss(margin, SemihardNegativeTripletSelector(margin))
lr = 1e-3
optimizer = optim.Adam(model.parameters(), lr=lr, weight_decay=1e-4)
#optimizer = optim.SGD(model.parameters(), lr=lr, momentum=0.9, weight_decay=0.0005)
scheduler = lr_scheduler.StepLR(optimizer, 8, gamma=0.1, last_epoch=-1)
n_epochs = 25
log_interval = 50

from datasets import BalancedBatchSampler

train_batch_sampler = BalancedBatchSampler(train_dataset.targets,
                                           n_classes=100,
                                           n_samples=5)
test_batch_sampler = BalancedBatchSampler(test_dataset.targets,
                                          n_classes=100,
                                          n_samples=5)

kwargs = {'num_workers': 0, 'pin_memory': True} if cuda else {}
online_train_loader = torch.utils.data.DataLoader(
    train_dataset, batch_sampler=train_batch_sampler, **kwargs)
online_test_loader = torch.utils.data.DataLoader(
    test_dataset, batch_sampler=test_batch_sampler, **kwargs)

fit(online_train_loader,
    online_test_loader,
    model,
    loss_fn,
test_dataset.test_labels = torch.Tensor(l_te)

fashion_mnist_classes = [
    '11_Starbucks', '12_Vans', '13_Burberry', '14_ALDO', '15_Polo',
    '16_ClubMonaco', '17_HatsOn', '18_Guess', '19_Victoria', '20_TheBodyShop',
    '21_Brooks', '22_Zara', '23_VanHart', '24_Starfield', '25_Lacoste',
    '26_Hollys', '27_Converse', '28_Fendi', '29_Chicor', '30_Custom',
    '31_Yankee', '32_Tommy', '33_GS', '34_KizDom', '35_Cartier', '36_Hermes',
    '37_HM', '38_Gucci', '39_AT', '40_Chanel'
]

mnist_classes = fashion_mnist_classes

train_batch_sampler = BalancedBatchSampler(train_dataset.train_labels,
                                           n_classes=30,
                                           n_samples=16)
test_batch_sampler = BalancedBatchSampler(test_dataset.test_labels,
                                          n_classes=30,
                                          n_samples=16)

kwargs = {'num_workers': 1, 'pin_memory': True} if cuda else {}
online_train_loader = data.DataLoader(train_dataset,
                                      batch_sampler=train_batch_sampler,
                                      **kwargs)
online_test_loader = data.DataLoader(test_dataset,
                                     batch_sampler=test_batch_sampler,
                                     **kwargs)

margin = 1.
lr = 2e-4
Exemple #8
0
    cuda = torch.cuda.is_available()
    kwargs = {'num_workers': 1, 'pin_memory': True} if cuda else {}
    if args.classify:
        train_dataset.classify_train_labels()
        test_dataset.classify_test_labels()
        train_loader = torch.utils.data.DataLoader(train_dataset,
                                                   batch_size=args.samples,
                                                   shuffle=True,
                                                   **kwargs)
        test_loader = torch.utils.data.DataLoader(test_dataset,
                                                  batch_size=args.samples,
                                                  shuffle=False,
                                                  **kwargs)
    else:
        train_batch_sampler = BalancedBatchSampler(train_dataset,
                                                   n_classes=args.classes,
                                                   n_samples=args.samples)
        test_batch_sampler = BalancedBatchSampler(test_dataset,
                                                  n_classes=args.classes,
                                                  n_samples=args.samples)
        train_loader = torch.utils.data.DataLoader(
            train_dataset, batch_sampler=train_batch_sampler, **kwargs)
        test_loader = torch.utils.data.DataLoader(
            test_dataset, batch_sampler=test_batch_sampler, **kwargs)

    if args.network == 'resnet':
        model = EmbeddingResNet(args.tuning)
    if args.network == 'vgg16':
        model = EmbeddingVgg16(args.tuning)
    if args.network == 'inception':
        model = EmbeddingInception(args.tuning)
img_file = '/home/yixiong/exercise/In-shop_Clothes_Retrieval_Benchmark/Anno/query_img.txt'
id_file = '/home/yixiong/exercise/In-shop_Clothes_Retrieval_Benchmark/Anno/query_id.txt'
test_dataset = InShopDataset(img_path, img_file, id_file, train=False)

# Set up data loaders
kwargs = {'num_workers': 1, 'pin_memory': True} if cuda else {}
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True, **kwargs)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=batch_size, shuffle=False, **kwargs)
# gallery_loader = torch.utils.data.DataLoader(test_dataset, batch_size=batch_size, shuffle=False, **kwargs)

# We'll create mini batches by sampling labels that will be present in the mini batch and number of examples
# from each class

if target_type == 'item_id':
    train_batch_sampler = BalancedBatchSampler(train_dataset.ids, n_classes=n_classes, n_samples=n_samples)
    test_batch_sampler = BalancedBatchSampler(test_dataset.ids, n_classes=n_classes, n_samples=n_samples)
else:
    train_batch_sampler = BalancedBatchSampler(train_dataset.catogories_id, n_classes=n_classes, n_samples=n_samples)
    test_batch_sampler = BalancedBatchSampler(test_dataset.catogories_id, n_classes=n_classes, n_samples=n_samples)

online_train_loader = torch.utils.data.DataLoader(train_dataset, batch_sampler=train_batch_sampler, **kwargs)
online_test_loader = torch.utils.data.DataLoader(test_dataset, batch_sampler=test_batch_sampler, **kwargs)

# Set up the network and training parameters
from networks import EmbeddingNet
from losses import OnlineTripletLoss
from utils import AllTripletSelector,HardestNegativeTripletSelector, RandomNegativeTripletSelector, SemihardNegativeTripletSelector # Strategies for selecting triplets within a minibatch
from metrics import AverageNonzeroTripletsMetric

margin = 1.
Exemple #10
0
def main(args):
    if os.path.exists('models') is False:
        os.makedirs('models')

    # img_list, base_path, item_dict = read_data(args.dataset, args.bbox)
    img_list, base_path, item_dict = read_data("DeepFashion2", bbox_gt=False)
    model_save_path = args.model_path  # 'models/siames_triplet_df2.pth'

    # writer = SummaryWriter('runs/fashion_mnist_experiment_1')
    model = ResNetbasedNet()
    if os.path.exists(model_save_path):
        model.load_state_dict(torch.load(model_save_path))

    cuda = torch.cuda.is_available()
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    if cuda:
        if torch.cuda.device_count() > 1:
            model = nn.DataParallel(model)
        model.to(device)

    kwargs = {'num_workers': 8, 'pin_memory': True} if cuda else {}

    if not args.phase:
        train_dataset = DeepFashionDataset(img_list['train'], root=base_path)
        train_batch_sampler = BalancedBatchSampler(train_dataset.labels,
                                                   train_dataset.source,
                                                   n_classes=32,
                                                   n_samples=4)
        online_train_loader = torch.utils.data.DataLoader(
            train_dataset, batch_sampler=train_batch_sampler, **kwargs)

        test_dataset = DeepFashionDataset(img_list['validation'],
                                          root=base_path)
        test_batch_sampler = BalancedBatchSampler(test_dataset.labels,
                                                  test_dataset.source,
                                                  n_classes=32,
                                                  n_samples=4)
        online_test_loader = torch.utils.data.DataLoader(
            test_dataset, batch_sampler=test_batch_sampler, **kwargs)

        margin = 1.
        loss_fn = OnlineTripletLoss(margin,
                                    RandomNegativeTripletSelector(margin))
        lr = 1e-3
        optimizer = optim.Adam(model.parameters(), lr=lr, weight_decay=1e-4)
        scheduler = lr_scheduler.StepLR(optimizer, 5, gamma=0.1, last_epoch=-1)
        n_epochs = 20
        log_interval = 200

        fit(online_train_loader,
            online_test_loader,
            model,
            loss_fn,
            optimizer,
            scheduler,
            n_epochs,
            cuda,
            log_interval,
            model_save_path,
            metrics=[AverageNonzeroTripletsMetric()])

    else:
        model.eval()
        gallery_dataset = DeepFashionDataset(img_list['validation'],
                                             root=base_path)

        acc, query_cnt = get_topK_acc(gallery_dataset, item_dict['validation'],
                                      model, cuda,
                                      open('retrieval_result.txt', 'a'), 100)
        np.savetxt('TopK_accuracy.txt',
                   np.concatenate((acc, np.asarray([query_cnt]))),
                   fmt='%1.5f',
                   delimiter=',')
Exemple #11
0
elif mining_tech == 'Triplet':
    train_dataset = TripletVGG10(dataset_tr)
    val_dataset = TripletVGG10(datset_val)
    loss_fn = OnlineTripletLoss(margin, RandomNegativeTripletSelector(margin))
    if cuda:
        loss_fn = loss_fn.cuda()

elif mining_tech == 'Doppleganger':
    pass

else:
    raise Exception('Wrong choice for mining technique')"""

train_batch_sampler = BalancedBatchSampler(dataset_tr.train_labels,
                                           n_classes=27,
                                           n_samples=3)
val_batch_sampler = BalancedBatchSampler(dataset_val.val_labels,
                                         n_classes=27,
                                         n_samples=3)

train_loader = DataLoader(dataset_tr,
                          batch_sampler=train_batch_sampler,
                          **kwargs)
val_loader = DataLoader(dataset_val, batch_sampler=val_batch_sampler, **kwargs)

embedding_net = EmbeddingNet()
model = embedding_net
if cuda:
    model = nn.DataParallel(model).cuda()
print(train_classes, test_classes)

# In[17]:

from losses import TripletLoss
from torch.optim import lr_scheduler
import torch.optim as optim
from trainer import fit
cuda = torch.cuda.is_available()

# In[18]:

from datasets import BalancedBatchSampler

train_batch_sampler = BalancedBatchSampler(train_dataset.train_labels,
                                           n_classes=train_classes,
                                           n_samples=train_classes_samples)
test_batch_sampler = BalancedBatchSampler(test_dataset.test_labels,
                                          n_classes=test_classes,
                                          n_samples=train_classes_samples)

train_batch_sampler_rgb = BalancedBatchSampler(train_dataset_rgb.train_labels,
                                               n_classes=train_classes,
                                               n_samples=train_classes_samples)
test_batch_sampler_rgb = BalancedBatchSampler(test_dataset_rgb.test_labels,
                                              n_classes=test_classes,
                                              n_samples=train_classes_samples)

kwargs = {'num_workers': 4, 'pin_memory': True} if cuda else {}

online_train_loader = torch.utils.data.DataLoader(
Exemple #13
0
def main(args):
    model_path = args.model_path
    save_dir = args.save_dir
    vec_dim = 128

    data_type = ['validation'
                 ] if args.phase == 'test' else ['train', 'validation']
    img_list, base_path, item_dict = read_data("DeepFashion2",
                                               bbox_gt=True,
                                               type_list=data_type)

    # model = ResNetbasedNet(vec_dim=vec_dim, max_pool=True, load_path=model_path, clf2_num=2, adv_eta=1e-4)
    model = ResNetbasedNet(vec_dim=vec_dim,
                           max_pool=True,
                           load_path=model_path,
                           clf2_num=2)

    domain_adap = args.domain_adap
    adv_train = args.adv_train
    is_cud = torch.cuda.is_available()
    device = torch.device("cuda" if is_cud else "cpu")
    if is_cud:
        if torch.cuda.device_count() > 1:
            model = nn.DataParallel(model)
        model.to(device)
    kwargs = {'num_workers': 8, 'pin_memory': True} if is_cud else {}

    if args.phase == 'train':
        train_dataset = DeepFashionDataset(img_list['train'],
                                           root=base_path,
                                           augment=True)
        train_batch_sampler = BalancedBatchSampler(train_dataset.labels,
                                                   train_dataset.source,
                                                   n_classes=64,
                                                   n_samples=4)
        online_train_loader = torch.utils.data.DataLoader(
            train_dataset, batch_sampler=train_batch_sampler, **kwargs)

        test_dataset = DeepFashionDataset(img_list['validation'],
                                          root=base_path)
        test_batch_sampler = BalancedBatchSampler(test_dataset.labels,
                                                  test_dataset.source,
                                                  n_classes=64,
                                                  n_samples=4)
        online_test_loader = torch.utils.data.DataLoader(
            test_dataset, batch_sampler=test_batch_sampler, **kwargs)

        margin = 0.2
        loss_fn = OnlineTripletLoss(margin,
                                    HardestNegativeTripletSelector(margin),
                                    domain_adap)
        # loss_fn = AllTripletLoss(margin)
        criterion = nn.CrossEntropyLoss()
        optimizer = optim.Adam(model.parameters(), lr=1e-5, weight_decay=5e-4)
        # scheduler = lr_scheduler.ReduceLROnPlateau(optimizer, mode="max", patience=4, threshold=0.001, cooldown=2, min_lr=1e-4 / (10 * 2),)
        scheduler = lr_scheduler.ReduceLROnPlateau(
            optimizer,
            mode="max",
            patience=4,
            threshold=1,
            cooldown=2,
            min_lr=1e-5 / (10 * 2),
        )
        n_epochs = 300
        log_interval = 200

        fit(online_train_loader,
            online_test_loader,
            model,
            loss_fn,
            optimizer,
            scheduler,
            n_epochs,
            is_cud,
            log_interval,
            save_dir,
            metrics=[AverageNonzeroTripletsMetric()],
            start_epoch=200,
            criterion=criterion,
            domain_adap=domain_adap,
            adv_train=adv_train)
        # fit(online_train_loader, online_test_loader, model, loss_fn, optimizer, scheduler, n_epochs, is_cud, log_interval,
        #     save_dir, metrics=[AverageNonzeroTripletsMetric()], start_epoch=0, criterion=criterion,
        #     adv_train=True, adv_epsilon=0.01, adv_alph=0.007, adv_iter=1)

    else:
        with torch.no_grad():
            model.eval()
            test_dataset = DeepFashionDataset(img_list['validation'],
                                              root=base_path)
            test_loader = torch.utils.data.DataLoader(test_dataset,
                                                      batch_size=256,
                                                      shuffle=False,
                                                      num_workers=4)
            embedding_mtx = torch.zeros((len(test_dataset), vec_dim))
            labels = np.zeros(len(test_dataset))
            top_k = 500
            idx_ = 0
            start_time = time.time()
            cf_mtx = np.zeros(
                4, dtype=float
            )  # predict_user_real_user / predict_user_real_shop / predict_shop_real_user / predict_shop_real_shop

            for idx, (data, target, _, source) in enumerate(test_loader):
                emb_vecs = model(data.cuda())
                embedding_mtx[idx_:idx_ + len(data)] = emb_vecs[0]
                predict = torch.argmax(emb_vecs[1], dim=1).cpu().numpy()
                real = source.cpu().numpy()
                cf_mtx[0] += np.sum((predict == 0) & (real == 0))
                cf_mtx[1] += np.sum((predict == 0) & (real == 1))
                cf_mtx[2] += np.sum((predict == 1) & (real == 0))
                cf_mtx[3] += np.sum((predict == 1) & (real == 1))
                labels[idx_:idx_ + len(data)] = np.asarray(target)
                idx_ += len(data)
                if idx % 20 == 0:
                    print('processing {}/{}... elapsed time {}s'.format(
                        idx + 1, len(test_loader),
                        time.time() - start_time))

        print('Total: {}, Domain Classification Acc: {:.5f}'.format(
            np.sum(cf_mtx), (cf_mtx[0] + cf_mtx[3]) / np.sum(cf_mtx)))
        print('Recall User Photo: {:.5f}'.format(cf_mtx[0] /
                                                 (cf_mtx[0] + cf_mtx[2])))
        print('Recall Shop Photo: {:.5f}'.format(cf_mtx[3] /
                                                 (cf_mtx[1] + cf_mtx[3])))

        np.save(os.path.join(save_dir, 'emb_mtx.npy'), embedding_mtx)
        with open(os.path.join(save_dir, 'file_info.txt'), 'w') as f:
            for i in range(len(test_dataset)):
                f.write('{},{},{},{}\n'.format(img_list['validation'][i][0],
                                               test_dataset[i][1],
                                               test_dataset[i][2],
                                               test_dataset[i][3]))
        print('save files!')

        distance_mtx = pdist(embedding_mtx)
        sorted_idx = torch.argsort(distance_mtx, dim=1).cpu().numpy()
        result_arr = np.zeros((sorted_idx.shape[0], top_k))
        for idx in range(sorted_idx.shape[0]):
            result_arr[idx] = sorted_idx[idx][sorted_idx[idx] != idx][:top_k]
            result_arr[idx] = labels[result_arr[idx].astype(
                np.int)] == labels[idx]
            if idx % 1000 == 0:
                print(idx)

        for k in [1, 5, 10, 20, 100, 200, 500]:
            topk_accuracy = np.sum(
                np.sum(result_arr[:, :k], axis=1) > 0) / result_arr.shape[0]
            print('Top-{} Accuracy: {:.5f}'.format(k, topk_accuracy))