Ejemplo n.º 1
0
def main(args):
    writer = MlflowWriter(args.exp_name)
    writer = write_log_base(args, writer)
    logger = CustomMlFlowLogger(writer)

    pl.seed_everything(args.seed)
    model = mobilenet_v2(pretrained=True, num_classes=args.num_classes)
    datamodule = AnimeFaceDataModule(args)
    criterion = nn.CrossEntropyLoss()
    plmodel = ImageClassifier(args, model, criterion)
    trainer = pl.Trainer(
        logger=logger,
        checkpoint_callback=False,
        gpus=2,
        max_epochs=args.epochs,
        flush_logs_every_n_steps=args.print_freq,
        log_every_n_steps=args.log_freq,
        accelerator="dp",
        precision=16 if args.apex else 32,
        deterministic=True,
        num_sanity_val_steps=-1,
    )
    starttime = time.time()  # 実行時間計測(実時間)
    trainer.fit(plmodel, datamodule=datamodule)
    trainer.test(plmodel, datamodule=datamodule, verbose=True)
    writer.move_mlruns()
    # 実行時間表示
    endtime = time.time()
    interval = endtime - starttime
    print("elapsed time = {0:d}h {1:d}m {2:d}s".format(
        int(interval / 3600),
        int((interval % 3600) / 60),
        int((interval % 3600) % 60),
    ))
Ejemplo n.º 2
0
def dump_features(data_dir, features_dir):
    with open(os.path.join(data_dir, 'labels.txt'), 'r') as fp:
        labels = [line.strip() for line in fp.readlines()]
    label_to_index = {label: index for index, label in enumerate(labels)}

    model = mobilenet_v2()
    model.trainable = False

    for category in sorted(os.listdir(data_dir)):
        if not os.path.isdir(os.path.join(data_dir, category)):
            continue
        print(f'calculate features of {category} data...')

        features, label = [], []
        for root, dirs, files in os.walk(os.path.join(data_dir, category)):
            if not files:
                continue
            print(root)
            for filename in files:
                image = tf.io.read_file(os.path.join(root, filename))
                image = tf.io.decode_image(image, channels=3)
                image = tf.image.convert_image_dtype(image, dtype=tf.float32)
                features.append(
                    model(tf.expand_dims(image, axis=0)).numpy().flatten())
                label.append(label_to_index[os.path.basename(root)])
        np.savez(os.path.join(features_dir, f'{category}.npz'),
                 inputs=features,
                 targets=label)
Ejemplo n.º 3
0
def train(data_dir, features_dir, weights_dir, batch_size=32):
    if len(glob.glob(os.path.join(features_dir, '*.npz'))) == 0:
        os.makedirs(features_dir, exist_ok=True)
        dump_features(data_dir, features_dir)

    def dataset(category):
        npz = np.load(os.path.join(features_dir, f'{category}.npz'))
        inputs = npz['inputs']
        targets = npz['targets']
        size = inputs.shape[0]
        return tf.data.Dataset.from_tensor_slices(
            (inputs, targets)).shuffle(size), size

    training_data, training_size = dataset('training')
    validation_data, validation_size = dataset('validation')

    with open(os.path.join(data_dir, 'labels.txt')) as fp:
        labels = [line.strip() for line in fp.readlines()]
    classes = len(labels)

    model = tf.keras.Sequential([
        tf.keras.layers.InputLayer((1280, )),
        tf.keras.layers.Dropout(rate=0.2),
        tf.keras.layers.Dense(
            classes,
            activation='softmax',
            kernel_regularizer=tf.keras.regularizers.l2(1e-4)),
    ])
    model.summary()
    model.compile(loss=tf.keras.losses.SparseCategoricalCrossentropy(),
                  metrics=[tf.keras.metrics.SparseCategoricalAccuracy()])
    history = model.fit(training_data.repeat().batch(batch_size),
                        steps_per_epoch=training_size // batch_size,
                        epochs=100,
                        validation_data=validation_data.batch(batch_size),
                        validation_steps=validation_size // batch_size,
                        callbacks=[tf.keras.callbacks.TensorBoard()])
    print(history.history)

    model.save_weights(os.path.join(weights_dir, 'transfer_weights.h5'))
    classifier = tf.keras.Sequential([
        mobilenet_v2(),
        model,
    ])
    classifier.trainable = False
    classifier.save('transfer_classifier.h5')
Ejemplo n.º 4
0
def main():
    logger, result_dir, _ = utils.config_backup_get_log(args, __file__)

    device = utils.get_device()
    utils.set_seed(args.seed, device)  # set random seed

    dataset = COVID19DataSet(root=args.datapath,
                             ctonly=args.ctonly)  # load dataset
    trainset, testset = split_dataset(dataset=dataset, logger=logger)

    if args.model.lower() in ['mobilenet']:
        net = mobilenet_v2(task='classification',
                           moco=False,
                           ctonly=args.ctonly).to(device)
    elif args.model.lower() in ['densenet']:
        net = densenet121(task='classification',
                          moco=False,
                          ctonly=args.ctonly).to(device)
    else:
        raise Exception

    criterion = torch.nn.BCEWithLogitsLoss()
    optimizer = torch.optim.Adam(net.parameters(),
                                 lr=args.lr,
                                 weight_decay=1e-3)
    scheduler = torch.optim.lr_scheduler.StepLR(optimizer,
                                                step_size=25,
                                                gamma=0.1)
    trainloader = torch.utils.data.DataLoader(trainset,
                                              batch_size=args.bstrain,
                                              shuffle=True,
                                              num_workers=args.nworkers)
    testloader = torch.utils.data.DataLoader(testset,
                                             batch_size=args.bstest,
                                             shuffle=False,
                                             num_workers=args.nworkers)

    best_auroc = 0.
    print('==> Start training ..')
    start = time.time()
    for epoch in range(args.maxepoch):
        net = train(epoch, net, trainloader, criterion, optimizer, scheduler,
                    args.model, device)
        scheduler.step()
        if epoch % 5 == 0:
            auroc, aupr, f1_score, accuracy = validate(net, testloader, device)
            logger.write(
                'Epoch:%3d | AUROC: %5.4f | AUPR: %5.4f | F1_Score: %5.4f | Accuracy: %5.4f\n'
                % (epoch, auroc, aupr, f1_score, accuracy))
            if auroc > best_auroc:
                best_auroc = auroc
                best_aupr = aupr
                best_epoch = epoch
                print("save checkpoint...")
                torch.save(net.state_dict(),
                           './%s/%s.pth' % (result_dir, args.model))

    auroc, aupr, f1_score, accuracy = validate(net, testloader, device)
    logger.write(
        'Epoch:%3d | AUROC: %5.4f | AUPR: %5.4f | F1_Score: %5.4f | Accuracy: %5.4f\n'
        % (epoch, auroc, aupr, f1_score, accuracy))

    if args.batchout:
        with open('temp_result.txt', 'w') as f:
            f.write("%10.8f\n" % (best_auroc))
            f.write("%10.8f\n" % (best_aupr))
            f.write("%d" % (best_epoch))

    end = time.time()
    hours, rem = divmod(end - start, 3600)
    minutes, seconds = divmod(rem, 60)
    print("Elapsed Time: {:0>2}:{:0>2}:{:05.2f}".format(
        int(hours), int(minutes), seconds))
    logger.write("Elapsed Time: {:0>2}:{:0>2}:{:05.2f}\n".format(
        int(hours), int(minutes), seconds))
    return True
Ejemplo n.º 5
0
def main():
    args.task_selection = args.task_selection.split(',')

    torch.manual_seed(args.seed)

    # LOAD DATASET
    stat_file = args.stat_file
    with open(stat_file, 'r') as f:
        data = pickle.load(f)
        mean, std = data['mean'], data['std']
        mean = [float(m) for m in mean]
        std = [float(s) for s in std]
    normalize = transforms.Normalize(mean=mean, std=std)
    train_transform = transforms.Compose([
        transforms.RandomResizedCrop(224),
        transforms.RandomHorizontalFlip(),
        transforms.RandomRotation(90),
        transforms.ToTensor(),
        normalize,
    ])
    val_transform = transforms.Compose([
                transforms.Resize((224, 224)),
                transforms.ToTensor(),
                normalize,
    ])

    if not args.shape_dataset:
        if args.task_selection is not None:
            classes = args.task_selection
        elif args.office_dataset:
            classes = ['style', 'genre']
        elif args.bam_dataset:
            classes = ['content', 'emotion', 'media']
        else:
            classes = ['artist_name', 'genre', 'style', 'technique', 'century']
        valset = Wikiart(path_to_info_file=args.val_file, path_to_images=args.im_path,
                         classes=classes, transform=val_transform)
        trainset = Wikiart(path_to_info_file=args.train_file, path_to_images=args.im_path,
                           classes=classes, transform=train_transform)
    else:
        if args.task_selection is not None:
            classes = args.task_selection
        else:
            classes = ['shape', 'n_shapes', 'color_shape', 'color_background']
        valset = ShapeDataset(root_dir='/export/home/kschwarz/Documents/Data/Geometric_Shapes', split='val',
                              classes=classes, transform=val_transform)
        trainset = ShapeDataset(root_dir='/export/home/kschwarz/Documents/Data/Geometric_Shapes', split='train',
                                classes=classes, transform=train_transform)

    if not trainset.labels_to_ints == valset.labels_to_ints:
        print('validation set and training set int labels do not match. Use int conversion of trainset')
        print(trainset.labels_to_ints, valset.labels_to_ints)
        valset.labels_to_ints = trainset.labels_to_ints.copy()

    num_labels = [len(trainset.labels_to_ints[c]) for c in classes]

    # PARAMETERS
    use_cuda = args.use_gpu and torch.cuda.is_available()
    device_nb = args.device
    if use_cuda:
        torch.cuda.set_device(device_nb)
        torch.cuda.manual_seed_all(args.seed)

    # INITIALIZE NETWORK
    if args.model.lower() not in ['mobilenet_v2', 'vgg16_bn']:
        raise NotImplementedError('Unknown Model {}\n\t+ Choose from: [mobilenet_v2, vgg16_bn].'
                                  .format(args.model))
    elif args.model.lower() == 'mobilenet_v2':
        featurenet = mobilenet_v2(pretrained=True)
    elif args.model.lower() == 'vgg16_bn':
        featurenet = vgg16_bn(pretrained=True)
    if args.not_narrow:
        bodynet = featurenet
    else:
        bodynet = narrownet(featurenet, dim_feature_out=args.feature_dim)
    net = OctopusNet(bodynet, n_labels=num_labels)
    n_parameters = sum([p.data.nelement() for p in net.parameters() if p.requires_grad])
    if use_cuda:
        net = net.cuda()
    print('Using {}\n\t+ Number of params: {}'.format(str(bodynet).split('(')[0], n_parameters))

    # LOG/SAVE OPTIONS
    log_interval = args.log_interval
    log_dir = args.log_dir
    if not os.path.isdir(log_dir):
        os.makedirs(log_dir)

    # tensorboard summary writerR
    timestamp = time.strftime('%m-%d-%H-%M')
    if args.shape_dataset:
        expname = timestamp + '_ShapeDataset_' + str(bodynet).split('(')[0]
    else:
        expname = timestamp + '_' + str(bodynet).split('(')[0]
    if args.exp_name is not None:
        expname = expname + '_' + args.exp_name
    log = TBPlotter(os.path.join(log_dir, 'tensorboard', expname))
    log.print_logdir()

    # allow auto-tuner to find best algorithm for the hardware
    cudnn.benchmark = True

    write_config(args, os.path.join(log_dir, expname))

    # ININTIALIZE TRAINING
    optimizer = optim.SGD(net.parameters(), lr=args.lr, momentum=args.momentum)
    scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min', patience=10, threshold=1e-1, verbose=True)
    criterion = nn.CrossEntropyLoss()
    if use_cuda:
        criterion = criterion.cuda()

    kwargs = {'num_workers': 8, 'pin_memory': True} if use_cuda else {}
    trainloader = DataLoader(trainset, batch_size=args.batch_size, shuffle=True, **kwargs)
    valloader = DataLoader(valset, batch_size=args.batch_size, shuffle=True, **kwargs)

    # optionally resume from a checkpoint
    start_epoch = 1
    if args.chkpt is not None:
        if os.path.isfile(args.chkpt):
            print("=> loading checkpoint '{}'".format(args.chkpt))
            checkpoint = torch.load(args.chkpt, map_location=lambda storage, loc: storage)
            start_epoch = checkpoint['epoch']
            best_acc_score = checkpoint['best_acc_score']
            best_acc = checkpoint['acc']
            net.load_state_dict(checkpoint['state_dict'])
            optimizer.load_state_dict(checkpoint['optimizer'])
            scheduler.load_state_dict(checkpoint['scheduler'])
            print("=> loaded checkpoint '{}' (epoch {})"
                  .format(args.chkpt, checkpoint['epoch']))
        else:
            print("=> no checkpoint found at '{}'".format(args.chkpt))

    def train(epoch):
        losses = AverageMeter()
        accs = AverageMeter()
        class_acc = [AverageMeter() for i in range(len(classes))]

        # switch to train mode
        net.train()
        for batch_idx, (data, target) in enumerate(trainloader):
            if use_cuda:
                data, target = Variable(data.cuda()), [Variable(t.cuda()) for t in target]
            else:
                data, target = Variable(data), [Variable(t) for t in target]

            # compute output
            outputs = net(data)
            preds = [torch.max(outputs[i], 1)[1] for i in range(len(classes))]

            loss = Variable(torch.Tensor([0])).type_as(data[0])
            for i, o, t, p in zip(range(len(classes)), outputs, target, preds):
                # in case of None labels
                mask = t != -1
                if mask.sum() == 0:
                    continue
                o, t, p = o[mask], t[mask], p[mask]
                loss += criterion(o, t)
                # measure class accuracy and record loss
                class_acc[i].update((torch.sum(p == t).type(torch.FloatTensor) / t.size(0)).data)
            accs.update(torch.mean(torch.stack([class_acc[i].val for i in range(len(classes))])), target[0].size(0))
            losses.update(loss.data, target[0].size(0))

            # compute gradient and do optimizer step
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            if batch_idx % log_interval == 0:
                print('Train Epoch: {} [{}/{}]\t'
                      'Loss: {:.4f} ({:.4f})\t'
                      'Acc: {:.2f}% ({:.2f}%)'.format(
                    epoch, batch_idx * len(target), len(trainloader.dataset),
                    float(losses.val), float(losses.avg),
                           float(accs.val) * 100., float(accs.avg) * 100.))
                print('\t' + '\n\t'.join(['{}: {:.2f}%'.format(classes[i], float(class_acc[i].val) * 100.)
                                          for i in range(len(classes))]))

        # log avg values to somewhere
        log.write('loss', float(losses.avg), epoch, test=False)
        log.write('acc', float(accs.avg), epoch, test=False)
        for i in range(len(classes)):
            log.write('class_acc', float(class_acc[i].avg), epoch, test=False)

    def test(epoch):
        losses = AverageMeter()
        accs = AverageMeter()
        class_acc = [AverageMeter() for i in range(len(classes))]

        # switch to evaluation mode
        net.eval()
        for batch_idx, (data, target) in enumerate(valloader):
            if use_cuda:
                data, target = Variable(data.cuda()), [Variable(t.cuda()) for t in target]
            else:
                data, target = Variable(data), [Variable(t) for t in target]

            # compute output
            outputs = net(data)
            preds = [torch.max(outputs[i], 1)[1] for i in range(len(classes))]

            loss = Variable(torch.Tensor([0])).type_as(data[0])
            for i, o, t, p in zip(range(len(classes)), outputs, target, preds):
                # in case of None labels
                mask = t != -1
                if mask.sum() == 0:
                    continue
                o, t, p = o[mask], t[mask], p[mask]
                loss += criterion(o, t)
                # measure class accuracy and record loss
                class_acc[i].update((torch.sum(p == t).type(torch.FloatTensor) / t.size(0)).data)
            accs.update(torch.mean(torch.stack([class_acc[i].val for i in range(len(classes))])), target[0].size(0))
            losses.update(loss.data, target[0].size(0))

        score = accs.avg - torch.std(torch.stack([class_acc[i].avg for i in range(
            len(classes))])) / accs.avg  # compute mean - std/mean as measure for accuracy
        print('\nVal set: Average loss: {:.4f} Average acc {:.2f}% Acc score {:.2f} LR: {:.6f}'
              .format(float(losses.avg), float(accs.avg) * 100., float(score), optimizer.param_groups[-1]['lr']))
        print('\t' + '\n\t'.join(['{}: {:.2f}%'.format(classes[i], float(class_acc[i].avg) * 100.)
                                  for i in range(len(classes))]))
        log.write('loss', float(losses.avg), epoch, test=True)
        log.write('acc', float(accs.avg), epoch, test=True)
        for i in range(len(classes)):
            log.write('class_acc', float(class_acc[i].avg), epoch, test=True)
        return losses.avg.cpu().numpy(), float(score), float(accs.avg), [float(class_acc[i].avg) for i in
                                                                         range(len(classes))]

    if start_epoch == 1:  # compute baseline:
        _, best_acc_score, best_acc, _ = test(epoch=0)
    else:  # checkpoint was loaded
        best_acc_score = best_acc_score
        best_acc = best_acc

    for epoch in range(start_epoch, args.epochs + 1):
        # train for one epoch
        train(epoch)
        # evaluate on validation set
        val_loss, val_acc_score, val_acc, val_class_accs = test(epoch)
        scheduler.step(val_loss)

        # remember best acc and save checkpoint
        is_best = val_acc_score > best_acc_score
        best_acc_score = max(val_acc_score, best_acc_score)
        save_checkpoint({
            'epoch': epoch,
            'state_dict': net.state_dict(),
            'optimizer': optimizer.state_dict(),
            'scheduler': scheduler.state_dict(),
            'best_acc_score': best_acc_score,
            'acc': val_acc,
            'class_acc': {c: a for c, a in zip(classes, val_class_accs)}
        }, is_best, expname, directory=log_dir)

        if val_acc > best_acc:
            shutil.copyfile(os.path.join(log_dir, expname + '_checkpoint.pth.tar'),
                            os.path.join(log_dir, expname + '_model_best_mean_acc.pth.tar'))
        best_acc = max(val_acc, best_acc)

        if optimizer.param_groups[-1]['lr'] < 1e-5:
            print('Learning rate reached minimum threshold. End training.')
            break

    # report best values
    try:
        best = torch.load(os.path.join(log_dir, expname + '_model_best.pth.tar'), map_location=lambda storage, loc: storage)
    except IOError:         # could be only one task
        best = torch.load(os.path.join(log_dir, expname + '_model_best_mean_acc.pth.tar'), map_location=lambda storage, loc: storage)
    print('Finished training after epoch {}:\n\tbest acc score: {}\n\tacc: {}\n\t class acc: {}'
          .format(best['epoch'], best['best_acc_score'], best['acc'], best['class_acc']))
    print('Best model mean accuracy: {}'.format(best_acc))

    try:
        shutil.copyfile(os.path.join(log_dir, expname + '_model_best.pth.tar'),
                        os.path.join('models', expname + '_model_best.pth.tar'))
    except IOError:  # could be only one task
        shutil.copyfile(os.path.join(log_dir, expname + '_model_best_mean_acc.pth.tar'),
                        os.path.join('models', expname + '_model_best.pth.tar'))
Ejemplo n.º 6
0
def main():
    # LOAD DATASET
    stat_file = args.stat_file
    with open(stat_file, 'r') as f:
        data = pickle.load(f)
        mean, std = data['mean'], data['std']
        mean = [float(m) for m in mean]
        std = [float(s) for s in std]
    normalize = transforms.Normalize(mean=mean, std=std)
    img_transform = transforms.Compose(
        [transforms.Resize((224, 224)),
         transforms.ToTensor(), normalize])

    if args.shape_dataset:
        classes = ['shape']
        dataset = ShapeDataset(
            root_dir='/export/home/kschwarz/Documents/Data/Geometric_Shapes',
            split=args.info_file,
            classes=classes,
            transform=img_transform)
    elif args.stl_dataset:
        dataset = STL(transform=img_transform,
                      test='test' in args.info_file.split('/')[-1])
    else:
        dataset = Wikiart(path_to_info_file=args.info_file,
                          path_to_images=args.im_path,
                          classes=['image_id'],
                          transform=img_transform)

    # PARAMETERS
    use_cuda = args.use_gpu and torch.cuda.is_available()
    device_nb = args.device
    if use_cuda:
        torch.cuda.set_device(device_nb)

    # INITIALIZE NETWORK
    if args.model.lower() not in ['mobilenet_v2', 'vgg16_bn']:
        raise NotImplementedError(
            'Unknown Model {}\n\t+ Choose from: [mobilenet_v2, vgg16_bn].'.
            format(args.model))
    elif args.model.lower() == 'mobilenet_v2':
        featurenet = mobilenet_v2(pretrained=True)
    elif args.model.lower() == 'vgg16_bn':
        featurenet = vgg16_bn(pretrained=True)
    if args.not_narrow:
        net = featurenet
    else:
        net = narrownet(featurenet, dim_feature_out=args.feature_dim)
    if use_cuda:
        net = net.cuda()

    if args.weight_file is not None:
        remove_fc(net, inplace=True)
    else:
        make_featurenet(net, inplace=True)
    print('Extract features using {}.'.format(str(net)))

    if args.weight_file:
        pretrained_dict = load_weights(args.weight_file,
                                       net.state_dict(),
                                       prefix_file='bodynet.')
        net.load_state_dict(pretrained_dict)

    if use_cuda:
        net = net.cuda()

    kwargs = {'num_workers': 8} if use_cuda else {}
    loader = torch.utils.data.DataLoader(dataset,
                                         batch_size=args.batch_size,
                                         shuffle=False,
                                         **kwargs)

    net.eval()
    features = []
    for i, data in enumerate(loader):
        if isinstance(data, tuple) or isinstance(
                data, list):  # loader returns data, label
            data = data[0]
        if (i + 1) % 10 == 0:
            print('{}/{}'.format(i + 1, len(loader)))
        input = Variable(data,
                         requires_grad=False) if not use_cuda else Variable(
                             data.cuda(), requires_grad=False)
        output = net(input)
        features.append(output.data.cpu())

    features = torch.cat(features)
    features = features.numpy()
    image_names = dataset.df['image_id'].values.astype(str)

    if not os.path.isdir(args.output_dir):
        os.makedirs(args.output_dir)
    expname = '' if args.exp_name is None else '_' + args.exp_name
    if args.shape_dataset:
        outfile = os.path.join(
            args.output_dir, 'ShapeDataset_' + str(net).split('(')[0] + '_' +
            args.info_file.split('/')[-1].split('.')[0] + expname + '.hdf5')
    elif args.office_dataset:
        outfile = os.path.join(
            args.output_dir, 'OfficeDataset_' + str(net).split('(')[0] + '_' +
            args.info_file.split('/')[-1].split('.')[0] + expname + '.hdf5')
    elif args.bam_dataset:
        outfile = os.path.join(
            args.output_dir, 'BAMDataset_' + str(net).split('(')[0] + '_' +
            args.info_file.split('/')[-1].split('.')[0] + expname + '.hdf5')
    elif args.stl_dataset:
        outfile = os.path.join(
            args.output_dir, 'STLDataset_' + str(net).split('(')[0] + '_' +
            args.info_file.split('/')[-1].split('.')[0] + expname + '.hdf5')
    else:
        outfile = os.path.join(
            args.output_dir,
            str(net).split('(')[0] + '_' +
            args.info_file.split('/')[-1].split('.')[0] + expname + '.hdf5')

    with h5py.File(outfile, 'w') as f:
        f.create_dataset('features',
                         features.shape,
                         dtype=features.dtype,
                         data=features)
        f.create_dataset('image_names',
                         image_names.shape,
                         dtype=image_names.dtype,
                         data=image_names)
    print('Saved features to {}'.format(outfile))