Beispiel #1
0
def prepare_data(maskDFPath):
    maskDF = pd.read_pickle(maskDFPath)
    train, validate = train_test_split(maskDF, test_size=0.15, random_state=RANDOMSTATE,
                                        stratify=maskDF['mask'])
    trainDF = MaskDataset(train)
    validateDF = MaskDataset(validate)
    return trainDF, validateDF
Beispiel #2
0
def get_data_loaders(train_files, val_files, img_size=224):
    train_transform = Compose([
        ColorJitter(0.3, 0.3, 0.3, 0.3),
        RandomResizedCrop(img_size, scale=(0.8, 1.2)),
        RandomAffine(10.),
        RandomRotation(13.),
        RandomHorizontalFlip(),
        ToTensor(),
    ])
     #train_mask_transform = Compose([
      #   RandomResizedCrop(img_size, scale=(0.8, 1.2)),
       #  RandomAffine(10.),
        # RandomRotation(13.),
        # RandomHorizontalFlip(),
        # ToTensor(),
     #])
    val_transform = Compose([
        Resize((img_size, img_size)),
        ToTensor(),
    ])

    train_loader = DataLoader(MaskDataset(train_files, train_transform),
                              batch_size=BATCH_SIZE,
                              shuffle=True,
                              pin_memory=True,
                              num_workers=4)
    val_loader = DataLoader(MaskDataset(val_files, val_transform),
                            batch_size=BATCH_SIZE,
                            shuffle=False,
                            pin_memory=True,
                            num_workers=4)

    return train_loader, val_loader
Beispiel #3
0
def grid_image(np_images, gts, preds, n=9, shuffle=False):
    batch_size = np_images.shape[0]
    assert n <= batch_size
    choices = random.choices(range(batch_size), k=9) if shuffle else list(range(n))
    figure = plt.figure(figsize=(12, 18))  # cautions: hardcoded, 이미지 크기에 따라 figsize 를 조정해야 할 수 있습니다. T.T
    plt.subplots_adjust(top=0.9)               # cautions: hardcoded, 이미지 크기에 따라 top 를 조정해야 할 수 있습니다. T.T
    n_grid = np.ceil(n ** 0.5)
    tasks = ["mask", "gender", "age"]
    for idx, choice in enumerate(choices):
        gt = gts[choice].item()
        pred = preds[choice].item()
        image = np_images[choice]
        # title = f"gt: {gt}, pred: {pred}"
        gt_decoded_labels = MaskDataset.decode_multi_class(gt)
        pred_decoded_labels = MaskDataset.decode_multi_class(pred)
        title = "\n".join([
            f"{task} - gt: {gt_label}, pred: {pred_label}"
            for gt_label, pred_label, task
            in zip(gt_decoded_labels, pred_decoded_labels, tasks)
        ])

        plt.subplot(n_grid, n_grid, idx + 1, title=title)
        plt.xticks([])
        plt.yticks([])
        plt.grid(False)
        plt.imshow(image, cmap=plt.cm.binary)

    return figure
Beispiel #4
0
 def prepare_data(self) -> None:
     self.maskDF = maskDF = pd.read_pickle(self.maskDFPath)
     train, validate = train_test_split(maskDF, test_size=0.3, random_state=0,
                                        stratify=maskDF['mask'])
     self.trainDF = MaskDataset(train)
     self.validateDF = MaskDataset(validate)
     
     # Create weight vector for CrossEntropyLoss
     maskNum = maskDF[maskDF['mask']==1].shape[0]
     nonMaskNum = maskDF[maskDF['mask']==0].shape[0]
     nSamples = [nonMaskNum, maskNum]
     normedWeights = [1 - (x / sum(nSamples)) for x in nSamples]
     self.crossEntropyLoss = CrossEntropyLoss(weight=torch.tensor(normedWeights))
def train(args):
    dataset = MaskDataset(args.data_dir, args.num_jobs, training=True)
    tr_loader = data.DataLoader(dataset=dataset,
                                collate_fn=collate_func,
                                shuffle=True)
    dataset = MaskDataset(args.data_dir, args.num_jobs, training=False)
    dt_loader = data.DataLoader(dataset=dataset,
                                collate_fn=collate_func,
                                shuffle=True)

    estimator = EstimatorTrainer(513, args.checkout_dir, optimizer=args.optim, \
                                learning_rate=args.lr, resume_state=args.resume_state)
    estimator.train(tr_loader, dt_loader, epoch=args.epoch)
Beispiel #6
0
def get_dataloader(path, transform, shuffle):
    dataset = MaskDataset(label_path=path, transform=transform)

    loader = DataLoader(dataset,
                        batch_size=batch_size,
                        num_workers=4,
                        shuffle=shuffle)

    return len(dataset), loader
def get_data_loaders(val_files):
    val_transform = Compose([
        Resize((IMG_SIZE, IMG_SIZE)),
        ToTensor(),
    ])

    val_loader = DataLoader(MaskDataset(val_files, val_transform),
                            batch_size=1,
                            shuffle=TabError,
                            pin_memory=True,
                            num_workers=4)
    return val_loader
Beispiel #8
0
        return acc, sum_loss
    else:
        return acc


if __name__ == '__main__':
    set_random_seed(170516)

    model = MaskChecker(MODEL).to(DEVICE)
    criterion = LabelSmoothingLoss(18, 0.2)  #nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=LEARNING_RATE)
    scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer,
                                                     mode='min',
                                                     verbose=True)

    train_dataset = MaskDataset(train=True)
    train_loader = DataLoader(train_dataset,
                              batch_size=BATCH_SIZE,
                              num_workers=NUM_WORKERS,
                              shuffle=True)
    valid_dataset = MaskDataset(train=False)
    valid_loader = DataLoader(valid_dataset,
                              batch_size=BATCH_SIZE,
                              num_workers=NUM_WORKERS,
                              shuffle=True)

    logger = SummaryWriter(log_dir='logs')

    best_valid_acc = 0
    fails = 0
    for epoch in range(NUM_EPOCHS):
Beispiel #9
0
from tqdm import tqdm

from dataset import MaskDataset, MaskTestDataset
from model import MaskChecker
from train import evaluate

DEVICE = torch.device('cuda:0')
NUM_WORKERS = 4

BATCH_SIZE = 16

MODEL_PATH = 'checkpoints/resnet34_lr0.0003_b16_tol5_2.pt'
RESULT_PATH = 'submission.csv'

valid_dataset = MaskDataset(train=False)
valid_loader = DataLoader(valid_dataset,
                          batch_size=BATCH_SIZE,
                          num_workers=NUM_WORKERS,
                          shuffle=True)
test_dataset = MaskTestDataset()
test_loader = DataLoader(test_dataset,
                         batch_size=BATCH_SIZE,
                         num_workers=NUM_WORKERS,
                         shuffle=False)

model = torch.load(MODEL_PATH).to(DEVICE)
model.eval()

print('validation')
acc = evaluate(model, valid_loader, return_loss=False)
Beispiel #10
0
def train_model(config, wandb):

    seed_everything(config.seed)

    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

    model_module = getattr(import_module("model"), config.model)
    model = model_module(num_classes=18).to(device)

    #model = torch.nn.DataParallel(model)

    ########  DataSet

    transform = DataAugmentation(type=config.transform)  #center_384_1
    dataset = MaskDataset(config.data_dir, transform=transform)

    len_valid_set = int(config.data_ratio * len(dataset))
    len_train_set = len(dataset) - len_valid_set
    dataloaders, batch_num = {}, {}

    train_dataset, valid_dataset = torch.utils.data.random_split(
        dataset, [len_train_set, len_valid_set])
    if config.random_split == 0:
        print("tbd")

    sampler = None

    if config.sampler == 'ImbalancedDatasetSampler':
        sampler = ImbalancedDatasetSampler(train_dataset)

    use_cuda = torch.cuda.is_available()

    dataloaders['train'] = torch.utils.data.DataLoader(
        train_dataset,
        batch_size=config.batch_size,
        sampler=sampler,
        shuffle=False,
        num_workers=4,
        pin_memory=use_cuda)

    dataloaders['valid'] = torch.utils.data.DataLoader(
        valid_dataset,
        batch_size=config.batch_size,
        shuffle=False,
        num_workers=4,
        pin_memory=use_cuda)

    batch_num['train'], batch_num['valid'] = len(dataloaders['train']), len(
        dataloaders['valid'])

    #Loss
    criterion = create_criterion(config.criterion)

    #Optimizer
    optimizer = optim.SGD(model.parameters(), lr=config.lr, momentum=0.9)

    if config.optim == "AdamP":
        optimizer = AdamP(model.parameters(),
                          lr=config.lr,
                          betas=(0.9, 0.999),
                          weight_decay=config.weight_decay)
    elif config.optim == "AdamW":
        optimizer = optim.AdamW(model.parameters(),
                                lr=config.lr,
                                weight_decay=config.weight_decay)

    #Scheduler
    # Decay LR by a factor of 0.1 every 7 epochs
    #exp_lr_scheduler = lr_scheduler.StepLR(optimizer_ft, step_size=7, gamma=0.1)
    scheduler = lr_scheduler.StepLR(optimizer, step_size=1, gamma=0.9)
    if config.lr_scheduler == "cosine":
        print('cosine')
        Q = math.floor(len(train_dataset) / config.batch_size +
                       1) * config.epochs / 7
        scheduler = lr_scheduler.CosineAnnealingLR(optimizer, T_max=Q)
        #ConsineAnnealingWarmRestarts

    since = time.time()
    low_train = 0
    best_model_wts = copy.deepcopy(model.state_dict())
    best_acc = 0.0
    train_loss, train_acc, valid_loss, valid_acc = [], [], [], []
    num_epochs = config.epochs
    for epoch in range(num_epochs):
        print('Epoch {}/{}'.format(epoch, num_epochs - 1))
        print('-' * 10)

        # Each epoch has a training and validation phase
        for phase in ['train', 'valid']:
            if phase == 'train':
                model.train()  # Set model to training mode
            else:
                model.eval()  # Set model to evaluate mode

            running_loss, running_corrects, num_cnt = 0.0, 0, 0
            runnnig_f1 = 0

            # Iterate over data.
            idx = 0
            for inputs, labels in dataloaders[phase]:
                idx += 1
                inputs = inputs.to(device)
                labels = labels.to(device)

                # zero the parameter gradients
                optimizer.zero_grad()

                # forward
                # track history if only in train
                with torch.set_grad_enabled(phase == 'train'):
                    outputs = model(inputs)
                    _, preds = torch.max(outputs, 1)
                    loss = criterion(outputs, labels)
                    # backward + optimize only if in training phase
                    if phase == 'train':
                        loss.backward()
                        optimizer.step()
                    else:
                        runnnig_f1 += f1_score(labels.data.detach().cpu(),
                                               preds.detach().cpu(),
                                               average='macro')

                # statistics
                val_loss = loss.item() * inputs.size(0)
                running_loss += loss.item() * inputs.size(0)
                running_corrects += torch.sum(preds == labels.data)
                num_cnt += len(labels)
                if idx % 100 == 0:
                    _loss = loss.item() / config.batch_size
                    print(
                        f"Epoch[{epoch}/{config.epochs}]({idx}/{batch_num[phase]}) || "
                        f"{phase} loss {_loss:4.4} ")

            if phase == 'train':
                scheduler.step()

            epoch_loss = float(running_loss / num_cnt)
            epoch_acc = float(
                (running_corrects.double() / num_cnt).cpu() * 100)
            epoch_f1 = float(runnnig_f1 / num_cnt)
            if phase == 'train':
                train_loss.append(epoch_loss)
                train_acc.append(epoch_acc)
                if config.wandb:
                    wandb.log({"Train acc": epoch_acc})
            else:
                valid_loss.append(epoch_loss)
                valid_acc.append(epoch_acc)
                if config.wandb:
                    wandb.log({"Valid acc": epoch_acc})
                    wandb.log({"F1 Score": epoch_f1})

            print('{} Loss: {:.2f} Acc: {:.1f} f1 :{:.3f}'.format(
                phase, epoch_loss, epoch_acc, epoch_f1))

            # deep copy the model
            if phase == 'valid':
                if epoch_acc > best_acc:
                    best_idx = epoch
                    best_acc = epoch_acc
                    best_model_wts = copy.deepcopy(model.state_dict())
                    print('==> best model saved - %d / %.1f' %
                          (best_idx, best_acc))
                    low_train = 0
                elif epoch_acc < best_acc:
                    print('==> model finish')
                    low_train += 1

        if low_train > 0 and epoch > 4:
            break

        if phase == 'valid':
            if epoch_acc < 80:
                print('Stop valid is so low')
                break

    time_elapsed = time.time() - since
    print('Training complete in {:.0f}m {:.0f}s'.format(
        time_elapsed // 60, time_elapsed % 60))
    print('Best valid Acc: %d - %.1f' % (best_idx, best_acc))

    # load best model weights
    model.load_state_dict(best_model_wts)
    #torch.save(model.state_dict(), 'mask_model.pt')
    torch.save(model.state_dict(), config.name + '.pt')
    print('model saved')
    if config.wandb:
        wandb.finish()
    return model, best_idx, best_acc, train_loss, train_acc, valid_loss, valid_acc
Beispiel #11
0
parser.add_argument('--model_config')
parser.add_argument('--batch_size', default=32, type=int)
parser.add_argument('--learning_rate', default=1e-3, type=float)
parser.add_argument('--num_epoch', default=10, type=int)
parser.add_argument('--device', default='cpu')
parser.add_argument('--log_dir', default='logs')
parser.add_argument('--weight_dir', default='weight')

if __name__ == "__main__":
    args = parser.parse_args()

    print("Load vocab")
    tokenizer = load_tokenizer(args.src_vocab, args.tgt_vocab)

    print("Prepare data")
    train_ds = MaskDataset(args.train_file, tokenizer)
    test_ds = MaskDataset(args.test_file, tokenizer, use_mask=False)
    train_dl = DataLoader(train_ds, shuffle=True, batch_size=args.batch_size)
    test_dl = DataLoader(test_ds, shuffle=False, batch_size=args.batch_size)

    print("Init model")
    src_vocab_len = len(tokenizer.src_stoi)
    tgt_vocab_len = len(tokenizer.tgt_stoi)

    if args.model_config:
        with open(args.model_config) as f:
            config = json.load(f)
    else:
        config = {}

    model = Model(src_vocab_len, tgt_vocab_len, **config)
Beispiel #12
0
 def eval_masks(self):
     QS = Dataset(self.QSD2_W1, masking=True)
     GT = MaskDataset(self.QSD2_W1)
     eval_masks(QS, GT)