Exemple #1
0
def data_loaders(model, loss_func, train_dataset, valid_dataset, test_dataset):
    data_transform = transforms.Compose([
        transforms.Resize(model.input_size[1:]),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor()
    ])

    grayscale = model.input_size[0] != 3

    if loss_func.__name__ != 'TripletLoss':
        train_dataset = SiameseNetworkDataset(imageFolderDataset=train_dataset,
                                                       transform=data_transform,
                                                       grayscale=grayscale)

        valid_dataset = SiameseNetworkDataset(imageFolderDataset=valid_dataset,
                                                       transform=data_transform,
                                                       grayscale=grayscale)
    else:
        train_dataset = TripletDataset(imageFolderDataset=train_dataset,
                                                transform=data_transform,
                                                grayscale=grayscale)

        valid_dataset = TripletDataset(imageFolderDataset=valid_dataset,
                                                transform=data_transform,
                                                grayscale=grayscale)

    test_dataset = SiameseNetworkDataset(imageFolderDataset=test_dataset,
                                                  transform=data_transform,
                                                  grayscale=grayscale)

    train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=Config.train_batch_size, shuffle=True, num_workers=Config.num_workers)
    valid_loader = torch.utils.data.DataLoader(valid_dataset, batch_size=Config.valid_batch_size, shuffle=True, num_workers=Config.num_workers)
    test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=1, shuffle=True, num_workers=Config.num_workers)

    return train_loader, valid_loader, test_loader
Exemple #2
0
def main():
    """
    Training.
    """
    global start_epoch, epoch, checkpoint

    # Initialize model or load checkpoint
    if checkpoint is None:
        model = UNet(in_channels, out_channels)
        # Initialize the optimizer
        optimizer = torch.optim.Adam(params=filter(lambda p: p.requires_grad,
                                                   model.parameters()),
                                     lr=lr)
    else:
        checkpoint = torch.load(checkpoint)
        start_epoch = checkpoint['epoch'] + 1
        model = checkpoint['model']
        optimizer = checkpoint['optimizer']

    # Move to default device
    model = model.to(device)
    criterion = nn.L1Loss().to(device)

    # Custom dataloaders
    train_loader = torch.utils.data.DataLoader(TripletDataset(
        train_folder, crop_size, scale),
                                               batch_size=batch_size,
                                               shuffle=True,
                                               num_workers=workers,
                                               pin_memory=True)
    test_loader = torch.utils.data.DataLoader(TripletDataset(
        test_folder, crop_size, scale),
                                              batch_size=batch_size,
                                              shuffle=True,
                                              num_workers=workers,
                                              pin_memory=True)

    # Total number of epochs to train for
    epochs = int(iterations // len(train_loader) + 1)

    # Epochs
    for epoch in range(start_epoch, epochs):
        # One epoch's training
        train(train_loader=train_loader,
              model=model,
              criterion=criterion,
              optimizer=optimizer,
              epoch=epoch,
              epochs=epochs)
        test(test_loader=test_loader, model=model, criterion=criterion)

        # Save checkpoint
        torch.save({
            'epoch': epoch,
            'model': model,
            'optimizer': optimizer
        }, f'checkpoints/checkpoint_unet_{epoch}.pth.tar')
    def train(self, config):
        train_dataset = TripletDataset(config['dataset_path'], 'train',
                                       config['data_augmentation_suffixes'])
        train_dataset.prepare(config['num_train_pairs'])

        val_dataset = TripletDataset(config['dataset_path'], 'validation')
        val_dataset.prepare(config['num_val_pairs'])

        train_generator = TripletDataGenerator(
            train_dataset,
            batch_size=config['batch_size'],
            dim=self.config['input_shape'],
            shuffle=config['shuffle_training_inputs'])
        val_generator = TripletDataGenerator(
            val_dataset,
            batch_size=config['batch_size'],
            dim=self.config['input_shape'],
            shuffle=config['shuffle_training_inputs'])

        model_path, _ = os.path.split(self.config['model_filename'])
        callbacks = [
            keras.callbacks.TensorBoard(log_dir=self.log_dir,
                                        histogram_freq=0,
                                        write_graph=True,
                                        write_images=False),
            keras.callbacks.ModelCheckpoint(self.checkpoint_path,
                                            verbose=0,
                                            save_weights_only=True)
        ]

        self.keras_model.compile(loss=utils.l2_loss,
                                 optimizer=Adam(lr=config['learning_rate']))

        self.keras_model.fit_generator(generator=train_generator,
                                       validation_data=val_generator,
                                       epochs=config['epochs'],
                                       use_multiprocessing=True,
                                       callbacks=callbacks,
                                       workers=multiprocessing.cpu_count())

        self.keras_model.save(self.config['model_filename'])
Exemple #4
0
 def __init__(self, path, transform, num_triplets, batchsize, resolution):
     self.path = path
     self.batchsize = batchsize
     self.num_workers = 4
     self.transform = transform
     self.resolution= resolution
     self.num_triplets= num_triplets
     self.dataset = TripletDataset(self.path, self.transform, num_triplets = self.num_triplets, resolution = self.resolution)
     self.dataloader = DataLoader(
         dataset=self.dataset,
         batch_size=self.batchsize,
         shuffle=False,
         num_workers=self.num_workers)
Exemple #5
0
def get_triplet_dataloader(root=None, batch_size=1, transforms=None):
    dataset = TripletDataset(root=root, transforms=transforms)
    dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True)

    return dataloader
def main(args):
    assert args.save_interval % 10 == 0, "save_interval must be a multiple of 10"

    # prepare dirs
    os.makedirs(args.log_dir, exist_ok=True)
    os.makedirs(args.save_model, exist_ok=True)
    
    device = torch.device("cuda:3" if torch.cuda.is_available() else "cpu")
    print("Device is", device)

    # img path loading
    with open("data/3d_data.pkl", mode='rb') as f:
        data_3d = pickle.load(f)
    train_path_list =  data_3d.train_pl
    val_path_list = data_3d.val_pl

    train_dataset = TripletDataset(transform=ImageTransform(), flist=train_path_list)
    train_dataloader = DataLoader(train_dataset, batch_size=args.batch_size, shuffle=True)

    val_dataset = TripletDataset(transform=ImageTransform(), flist=val_path_list)
    val_dataloader = DataLoader(val_dataset, batch_size=args.batch_size, shuffle=False)

    model = TripletNet()
    model.to(device)
    
    criterion = nn.MarginRankingLoss(margin=args.margin)

    # choose params to train
    update_params_name = []
    for name, _ in model.named_parameters():
        if 'layer4' in name:
            update_params_name.append(name)
        elif 'fc' in name:
            update_params_name.append(name)

    print("**-----** update params **-----**")
    print(update_params_name)
    print("**-----------------------------**")
    print()

    params_to_update = choose_update_params(update_params_name, model)

    # set optimizer
    optimizer = optim.SGD(params_to_update, lr=1e-4, momentum=0.9)

    # run epoch    
    log_writer = SummaryWriter(log_dir=args.log_dir)
    for epoch in range(args.num_epochs):
        print("-"*80)
        print('Epoch {}/{}'.format(epoch+1, args.num_epochs))

        epoch_loss, epoch_acc = [], []
        for inputs, labels in tqdm(train_dataloader):
            batch_loss, batch_acc = train_one_batch(inputs, labels, model, criterion, optimizer, device)
            epoch_loss.append(batch_loss.item())
            epoch_acc.append(batch_acc.item())
        
        epoch_loss = np.array(epoch_loss)
        epoch_acc = np.array(epoch_acc)
        print('[Loss: {:.4f}], [Acc: {:.4f}] \n'.format(np.mean(epoch_loss), np.mean(epoch_acc)))
        log_writer.add_scalar("train/loss", np.mean(epoch_loss), epoch+1)
        log_writer.add_scalar("train/acc", np.mean(epoch_acc), epoch+1)


        # validation
        if (epoch+1) % 10 == 0:
            print("Run Validation")
            epoch_loss, epoch_acc = [], []
            for inputs, labels in tqdm(val_dataloader):
                batch_loss, batch_acc = validation(inputs, labels, model, criterion, device)
                epoch_loss.append(batch_loss.item())
                epoch_acc.append(batch_acc.item())
            
            epoch_loss = np.array(epoch_loss)
            epoch_acc = np.array(epoch_acc)
            print('[Validation Loss: {:.4f}], [Validation Acc: {:.4f}]'.format(np.mean(epoch_loss), np.mean(epoch_acc)))
            log_writer.add_scalar("val/loss", np.mean(epoch_loss), epoch+1)
            log_writer.add_scalar("val/acc", np.mean(epoch_acc), epoch+1)

            # save model
            if (args.save_interval > 0) and ((epoch+1) % args.save_interval == 0):
                save_path = os.path.join(args.save_model, '{}_epoch_{:.1f}.pth'.format(epoch+1, np.mean(epoch_loss)))
                torch.save(model.state_dict(), save_path)

    log_writer.close()
    name = 'arcface1.pt'
    load_local_model = False

    # os.environ['CUDA_LAUNCH_BLOCKING']='1'

    # device: cpu or cuda
    os.environ[
        'CUDA_VISIBLE_DEVICES'] = '2'  # specify which gpu you want to use
    device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
    print("Device:", device)

    df_eval1 = pd.read_csv('../Data/eval_same.csv')
    df_eval2 = pd.read_csv('../Data/eval_diff.csv')
    df_test = pd.read_csv('../Data/test.csv')

    eval_dataset1 = TripletDataset(df_eval1, mode='eval')
    eval_dataset2 = TripletDataset(df_eval2, mode='eval')
    test_dataset = TripletDataset(df_test, mode='test')

    eval_loader1 = DataLoader(eval_dataset1,
                              batch_size=BATCH_SIZE,
                              num_workers=NUM_WORKERS,
                              drop_last=False)
    eval_loader2 = DataLoader(eval_dataset2,
                              batch_size=BATCH_SIZE,
                              num_workers=NUM_WORKERS,
                              drop_last=False)
    test_loader = DataLoader(test_dataset,
                             batch_size=BATCH_SIZE,
                             num_workers=NUM_WORKERS,
                             drop_last=False)
Exemple #8
0
def train_triplet_module():
    """
    :return:
    """
    # 设置超参数
    LR = 1e-5  # 学习率
    EPOCH = 15  # 训练轮数
    BATCH_SIZE = 10  # Class Batch size
    N_CLASS = 10  # 类别个数
    num_sub_dataset = 10  # 子集数量
    start_epoch = 0  # start from epoch 0 or last checkpoint epoch
    resume = False  # 是否断点训练
    workers = 0  # Number of workers for dataloader
    margin = 1e-1  # triplet loss 超参数 margin
    k = 1
    # k in topk
    interval = 5  # diff loss和triplet loss间隔epoch
    balance = 4e-2  # diff loss和triplet loss间权重

    # 加载数据
    train_dataset = TripletDataset(num_sub_dataset)
    train_loader = DataLoader(train_dataset,
                              batch_size=BATCH_SIZE,
                              shuffle=True,
                              num_workers=workers)
    diff_dataset = DifferenceDataset()

    # 定义模型
    model_set = []
    for i in range(num_sub_dataset):
        model = TripletModule().float().to(device)
        model_set.append(model)

    # 定义优化器
    params_set = []
    for model in model_set:
        params = [{'params': model.parameters(), 'lr': LR}]
        params_set.extend(params)
    optimizer = optim.Adam(params_set, lr=LR)

    # 断点训练,加载模型权重
    if resume:
        print('==> Resuming from checkpoint..')
        assert os.path.isdir(
            'Checkpoint'), 'Error: no Checkpoint directory found!'
        state = torch.load('Checkpoint/models/ckpt.pth')
        for i in range(num_sub_dataset):
            model = model_set[i]
            model.load_state_dict(state['net'][i])
        optimizer.load_state_dict(state['optim'])
        start_epoch = state['epoch']

    # 损失函数
    cosloss = nn.CosineSimilarity(dim=1, eps=1e-6).to(device)

    # 训练模型
    for epoch in range(start_epoch, EPOCH):
        print(
            "####################################################################################"
        )
        # 学习率调度机制
        adjust_learning_rate(optimizer, epoch)
        print('Learning rate is {}'.format(optimizer.param_groups[0]['lr']))
        ############################
        # 训练
        ############################
        # 训练模式
        for i in range(num_sub_dataset):
            model = model_set[i]
            model.train()
        # 迭代次数
        cnt = 0
        # triplet损失
        sum_triplet_loss = 0.
        # diff损失
        sum_diff_loss = 0.
        # 损失
        sum_loss = 0.
        for data in train_loader:
            cnt += 1

            # 加载Triplet数据集数据
            x, y = data
            batch_size = x.size(0)
            inputs, labels = torch.cat(tuple([x[:, i] for i in range(num_sub_dataset)]), dim=0),\
                                torch.cat(tuple([y[:, i] for i in range(num_sub_dataset)]), dim=0)
            inputs, labels = inputs.view(
                (-1, inputs.size(-1))), labels.view(-1)
            inputs, labels = inputs.float().to(device), labels.int().to(device)

            # 梯度置零
            optimizer.zero_grad()

            # 前向传播、后向传播
            num_subset_sample = batch_size * N_CLASS  # 每个子集对应batch的样本数量
            embeddings = torch.cat(tuple([
                model_set[i](inputs[num_subset_sample * i:num_subset_sample *
                                    (i + 1)]) for i in range(num_sub_dataset)
            ]),
                                   dim=0)

            triplet_loss = batch_hard_triplet_loss(k,
                                                   num_subset_sample,
                                                   labels,
                                                   embeddings,
                                                   margin=margin,
                                                   device=device)
            # triplet loss
            sum_triplet_loss += triplet_loss.item()

            # 加载Difference数据集数据
            x, y = diff_dataset.getsamples(batch_size)
            inputs, labels = torch.from_numpy(x), torch.from_numpy(y)
            inputs, labels = inputs.float().to(device), labels.int().to(device)
            outputs = []
            outputs_sum = None
            for model in model_set:
                output = model(inputs)
                outputs.append(output)
                if outputs_sum is None:
                    outputs_sum = output
                else:
                    outputs_sum += output
            diff_loss = 0.
            for output in outputs:
                # diff_loss += torch.sum(torch.abs(cosloss(output, (outputs_sum-output)/(num_sub_dataset-1)))) / inputs.size(0)
                diff_loss += torch.sum(
                    cosloss(output, (outputs_sum - output) /
                            (num_sub_dataset - 1))) / inputs.size(0)
            diff_loss /= num_sub_dataset
            sum_diff_loss += diff_loss.item()

            loss = triplet_loss + balance * diff_loss
            sum_loss += loss.item()

            if (epoch + 1) % interval == 0:
                loss.backward()
            else:
                triplet_loss.backward()
            # 梯度更新
            optimizer.step()

            # 打印日志
            if cnt % 5 == 0 or cnt == len(train_loader):
                print(
                    '[%d/%d]--[%d/%d]\tTriplet Loss: %.6f\tDiff Loss: %.6f\tLoss: %.6f'
                    % (epoch + 1, EPOCH, cnt, len(train_loader),
                       sum_triplet_loss / cnt, sum_diff_loss / cnt,
                       sum_loss / cnt))

        # 模型状态
        net_state_set = [model.state_dict() for model in model_set]
        # 保存断点模型
        state = {
            'net': net_state_set,
            'optim': optimizer.state_dict(),
            'epoch': epoch
        }
        torch.save(state, './Checkpoint/models/ckpt.pth')
def train_worker(dataset, device, rank=0, world_size=None):
    torch.cuda.set_device(device)
    criterion = TripletMarginRankingLoss(args.loss_margin)
    model = TransformerPool(args.vocab_size, args.embedding_dim,
                            args.hidden_dim, pre_trained=GLOVE)
    if args.re_train:
        model.load_state_dict(torch.load(
            args.train_model, map_location='cuda:{}'.format(device)))
    else:
        model.apply(init_weights)
    model, criterion = model.to(device), criterion.to(device)
    triplet_dataset = TripletDataset(dataset)

    in_distributed_mode = True if world_size else False
    if in_distributed_mode:
        rank, device = torch.distributed.get_rank(), torch.cuda.current_device()
        print("rank:{}, device:{}".format(rank, device))

    if in_distributed_mode:
        model = DistributedDataParallel(
            model, device_ids=[device])
        datasampler = DistributedSampler(triplet_dataset)
        dataloader = DataLoader(triplet_dataset, shuffle=False,
                                pin_memory=True, num_workers=0,
                                batch_size=args.batch_size, sampler=datasampler)
    else:
        dataloader = DataLoader(triplet_dataset, shuffle=True,
                                pin_memory=True, num_workers=4,
                                batch_size=args.batch_size)

    optimizer = RAdam(
        model.parameters(), lr=args.learning_rate)
    scheduler = optim.lr_scheduler.CosineAnnealingLR(
        optimizer, T_max=args.t_max, eta_min=args.eta_min)

    model.train()
    best_avg_loss = None
    t1 = time.time()
    for epoch in range(args.epoch):
        datasampler.set_epoch(epoch) if in_distributed_mode else None
        total_loss = []
        bar = tqdm(desc='EPOCH {:02d}'.format(epoch), total=len(
            dataloader), leave=False) if rank == 0 else None

        for triplet in dataloader:
            optimizer.zero_grad()
            anchor, positive, negative = model(triplet)
            loss = criterion(anchor, positive, negative)
            loss.backward()
            torch.nn.utils.clip_grad_norm_(model.parameters(), 5)
            optimizer.step()
            total_loss.append(loss.item())
            bar.update() if rank == 0 else None

        if rank == 0:
            bar.close()
            epoch_avg_loss = np.mean(total_loss)
            scheduler.step(epoch_avg_loss)
            print("Epoch {:02d}, Time {:.02f}s, AvgLoss {:.08f}, lr {:.8f}".format(
                epoch, time.time()-t1, epoch_avg_loss, optimizer.param_groups[0]['lr']))
            if best_avg_loss is None or epoch_avg_loss < best_avg_loss:
                best_avg_loss = epoch_avg_loss
                state_dict = model.module.state_dict() if in_distributed_mode else model.state_dict()
                torch.save(state_dict, args.model_path)
            t1 = time.time()
        scheduler.step(epoch)
        torch.cuda.empty_cache()
    return