Ejemplo n.º 1
0
def main(args):
    config_yaml = yaml.load(open(args.config, "r"), Loader=yaml.FullLoader)
    if not os.path.exists(args.config):
        raise FileNotFoundError('provided config file does not exist: %s' %
                                args.config)

    config_yaml['logger_name'] = 'onnx'
    config = SimCLRConfig(config_yaml)

    if not os.path.exists(config.base.output_dir_path):
        os.mkdir(config.base.output_dir_path)

    if not os.path.exists(config.base.log_dir_path):
        os.makedirs(config.base.log_dir_path)

    logger = setup_logger(config.base.logger_name, config.base.log_file_path)
    logger.info('using config: %s' % config)

    if not os.path.exists(args.model):
        raise FileNotFoundError('provided model directory does not exist: %s' %
                                args.model)
    else:
        logger.info('using model directory: %s' % args.model)

    config.onnx.model_path = args.model
    logger.info('using model_path: {}'.format(config.onnx.model_path))

    config.onnx.epoch_num = args.epoch_num
    logger.info('using epoch_num: {}'.format(config.onnx.epoch_num))

    model_file_path = Path(
        config.onnx.model_path).joinpath('checkpoint_' +
                                         config.onnx.epoch_num + '.pth')
    if not os.path.exists(model_file_path):
        raise FileNotFoundError('model file does not exist: %s' %
                                model_file_path)
    else:
        logger.info('using model file: %s' % model_file_path)

    train_dataset, val_dataset, test_dataset, classes = Datasets.get_datasets(
        config)
    num_classes = len(classes)

    train_loader, val_loader, test_loader = Datasets.get_loaders(
        config, train_dataset, val_dataset, test_dataset)

    torch_model = load_torch_model(config, num_classes)

    val_acc, test_acc = test_pt_model(config, torch_model, val_dataset,
                                      test_dataset, val_loader, test_loader)
    logger.info('torch model performance -> val_acc: {}, test_acc: {}'.format(
        val_acc, test_acc))

    torch_model = torch_model.to(torch.device('cpu'))
    onnx_model_file_path = save_onnx_model(torch_model,
                                           num_classes=num_classes,
                                           config=config,
                                           current_epoch=config.onnx.epoch_num)

    onnx_model = load_onnx_model(config, onnx_model_file_path)
    if onnx_model:
        logger.info('loaded onnx_model: {}'.format(onnx_model_file_path))

    val_acc, test_acc = test_onnx_model(config, onnx_model_file_path,
                                        val_dataset, test_dataset, val_loader,
                                        test_loader)
    logger.info('onnx model performance -> val_acc: {}, test_acc: {}'.format(
        val_acc, test_acc))
Ejemplo n.º 2
0
def main(args):
    config_yaml = yaml.load(open(args.config, "r"), Loader=yaml.FullLoader)
    if not os.path.exists(args.config):
        raise FileNotFoundError('provided config file does not exist: %s' % args.config)

    if 'restart_log_dir_path' not in config_yaml['simclr']['train'].keys():
        config_yaml['simclr']['train']['restart_log_dir_path'] = None

    if args.data_dir_path is not None:
        config_yaml['simclr']['train']['data_dir_path'] = args.data_dir_path
        print('yo!: ', args.data_dir_path)

    config_yaml['logger_name'] = 'logreg'
    config = SimCLRConfig(config_yaml)

    if not os.path.exists(config.base.output_dir_path):
        os.mkdir(config.base.output_dir_path)

    if not os.path.exists(config.base.log_dir_path):
        os.makedirs(config.base.log_dir_path)

    logger = setup_logger(config.base.logger_name, config.base.log_file_path)
    logger.info('using config: %s' % config)

    config_copy_file_path = os.path.join(config.base.log_dir_path, 'config.yaml')
    shutil.copy(args.config, config_copy_file_path)

    writer = SummaryWriter(log_dir=config.base.log_dir_path)

    if not os.path.exists(args.model):
        raise FileNotFoundError('provided model directory does not exist: %s' % args.model)
    else:
        logger.info('using model directory: %s' % args.model)

    config.logistic_regression.model_path = args.model
    logger.info('using model_path: {}'.format(config.logistic_regression.model_path))

    config.logistic_regression.epoch_num = args.epoch_num
    logger.info('using epoch_num: {}'.format(config.logistic_regression.epoch_num))

    model_file_path = Path(config.logistic_regression.model_path).joinpath(
        'checkpoint_' + config.logistic_regression.epoch_num + '.pth')
    if not os.path.exists(model_file_path):
        raise FileNotFoundError('model file does not exist: %s' % model_file_path)
    else:
        logger.info('using model file: %s' % model_file_path)

    train_dataset, val_dataset, test_dataset, classes = Datasets.get_datasets(config,
                                                                              img_size=config.logistic_regression.img_size)
    num_classes = len(classes)

    train_loader, val_loader, test_loader = Datasets.get_loaders(config, train_dataset, val_dataset, test_dataset)

    simclr_model = load_simclr_model(config)
    simclr_model = simclr_model.to(config.base.device)
    simclr_model.eval()

    model = LogisticRegression(simclr_model.num_features, num_classes)
    model = model.to(config.base.device)

    learning_rate = config.logistic_regression.learning_rate
    momentum = config.logistic_regression.momentum
    optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate, momentum=momentum, nesterov=True)
    criterion = torch.nn.CrossEntropyLoss()

    logger.info("creating features from pre-trained context model")
    (train_x, train_y, test_x, test_y) = get_features(
        config, simclr_model, train_loader, test_loader
    )

    feature_train_loader, feature_test_loader = get_data_loaders(
        config, train_x, train_y, test_x, test_y
    )

    best_model_wts = copy.deepcopy(model.state_dict())
    best_acc = 0.0
    best_epoch = 0
    best_loss = 0

    for epoch in range(config.logistic_regression.epochs):
        loss_epoch, accuracy_epoch = train(
            config, feature_train_loader, model, criterion, optimizer
        )

        loss = loss_epoch / len(train_loader)
        accuracy = accuracy_epoch / len(train_loader)

        writer.add_scalar("Loss/train_epoch", loss, epoch)
        writer.add_scalar("Accuracy/train_epoch", accuracy, epoch)
        logger.info(
            "epoch [%3.i|%i] -> train loss: %f, accuracy: %f" % (
                epoch + 1, config.logistic_regression.epochs, loss, accuracy)
        )

        if accuracy > best_acc:
            best_loss = loss
            best_epoch = epoch + 1
            best_acc = accuracy
            best_model_wts = copy.deepcopy(model.state_dict())

    model.load_state_dict(best_model_wts)
    logger.info(
        "train dataset performance -> best epoch: {}, loss: {}, accuracy: {}".format(best_epoch, best_loss, best_acc, )
    )

    loss_epoch, accuracy_epoch = test(
        config, feature_test_loader, model, criterion
    )

    loss = loss_epoch / len(test_loader)
    accuracy = accuracy_epoch / len(test_loader)
    logger.info(
        "test dataset performance -> best epoch: {}, loss: {}, accuracy: {}".format(best_epoch, loss, accuracy)
    )
Ejemplo n.º 3
0
def main(args):
    config_yaml = yaml.load(open(args.config, "r"), Loader=yaml.FullLoader)
    if not os.path.exists(args.config):
        raise FileNotFoundError(
            'provided config file does not exist: {}'.format(args.config))

    if 'restart_log_dir_path' not in config_yaml['simclr']['train'].keys():
        config_yaml['simclr']['train']['restart_log_dir_path'] = None

    config_yaml['logger_name'] = 'classification'
    config = SimCLRConfig(config_yaml)

    if not os.path.exists(config.base.output_dir_path):
        os.mkdir(config.base.output_dir_path)

    if not os.path.exists(config.base.log_dir_path):
        os.makedirs(config.base.log_dir_path)

    logger = setup_logger(config.base.logger_name, config.base.log_file_path)
    logger.info('using config: {}'.format(config))

    config_copy_file_path = os.path.join(config.base.log_dir_path,
                                         'config.yaml')
    shutil.copy(args.config, config_copy_file_path)

    writer = SummaryWriter(log_dir=config.base.log_dir_path)

    if not os.path.exists(args.model):
        raise FileNotFoundError('provided model directory does not exist: %s' %
                                args.model)
    else:
        logger.info('using model directory: {}'.format(args.model))

    config.fine_tuning.model_path = args.model
    logger.info('using model_path: {}'.format(config.fine_tuning.model_path))

    config.fine_tuning.epoch_num = args.epoch_num
    logger.info('using epoch_num: {}'.format(config.fine_tuning.epoch_num))

    model_file_path = Path(
        config.fine_tuning.model_path).joinpath('checkpoint_' +
                                                config.fine_tuning.epoch_num +
                                                '.pth')
    if not os.path.exists(model_file_path):
        raise FileNotFoundError(
            'model file does not exist: {}'.format(model_file_path))
    else:
        logger.info('using model file: {}'.format(model_file_path))

    train_dataset, val_dataset, test_dataset, classes = Datasets.get_datasets(
        config)
    num_classes = len(classes)

    train_loader, val_loader, test_loader = Datasets.get_loaders(
        config, train_dataset, val_dataset, test_dataset)

    dataloaders = {
        'train': train_loader,
        'val': val_loader,
    }

    dataset_sizes = {
        'train': len(train_loader.sampler),
        'val': len(val_loader.sampler)
    }

    simclr_model = load_model(config)
    logger.info('loaded simclr_model: {}'.format(
        config.fine_tuning.model_path))

    classification_model = to_classification_model(simclr_model, num_classes,
                                                   config)
    classification_model = classification_model.to(config.base.device)
    logger.info('created classification model from simclr model')

    criterion = torch.nn.CrossEntropyLoss()
    logger.info('created criterion')

    lr = config.fine_tuning.learning_rate
    momentum = config.fine_tuning.momentum
    optimizer_ft = torch.optim.SGD(classification_model.parameters(),
                                   lr=lr,
                                   momentum=momentum,
                                   nesterov=True)
    logger.info('created optimizer')

    step_size = config.fine_tuning.step_size
    gamma = config.fine_tuning.gamma
    exp_lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer_ft,
                                                       step_size=step_size,
                                                       gamma=gamma)
    logger.info('created learning rate scheduler')

    epochs = config.fine_tuning.epochs
    classification_model = train_model(classification_model, criterion,
                                       optimizer_ft, exp_lr_scheduler,
                                       dataloaders, dataset_sizes, config,
                                       epochs, writer)
    logger.info('completed model training')

    test_model(config, classification_model, test_loader)
    logger.info('completed model testing')

    trained_model_file_path = save_model(config, classification_model, epochs)
    logger.info('saved trained model: {}'.format(trained_model_file_path))