Exemple #1
0
def main():
    # Create main logger
    logger = get_logger('Graph matching requester')

    parser = argparse.ArgumentParser(description='Graph matching')
    parser.add_argument('--config', type=str, help='Path to the YAML config file', default = CONFIG_PATH)
    args = parser.parse_args()

    # Load and log experiment configuration
    config = load_config(args.config)
    logger.info(config)

    manual_seed = config.get('manual_seed', None)
    if manual_seed is not None:
        logger.info(f'Seed the RNG for all devices with {manual_seed}')
        torch.manual_seed(manual_seed)
        torch.backends.cudnn.deterministic = True
        torch.backends.cudnn.benchmark = False

    # Create the model
    module_path = "models.model"
    if torch.cuda.device_count() > 1:
        model = torch.nn.DataParallel(_get_model(module_path, config))
    else:
        model = _get_model(module_path, config)

    # put the model on GPUs
    logger.info(f"Sending the model to '{config['device']}', using {torch.cuda.device_count()} GPUs...")
    model = model.to(config['device'])

    # Create data loaders
    loaders = get_data_loaders(config)
Exemple #2
0
def main():
    args = get_parser().parse_args()

    config = Config.from_file(args.config)

    logger = get_logger(config.output_path)
    logger.info(args)
    logger.info("=> Starting evaluation ...")

    logger.info("Load data")
    corpus = io.load_json(config.input_path, append_title=config.use_title)

    logger.info("Perform preprocessing")
    preprocessed_corpus = Preprocessing(
        corpus["keywords"],
        config=config.preprocessing,
        datatype="keywords",
        logger=logger,
    ).apply_preprocessing()

    preprocessed_corpus["token"] = preprocessed_corpus["token"].apply(flatten)
    preprocessed_corpus.drop("abstract", axis=1, inplace=True)

    logger.info("Start clustering")
    clustering = Clustering(
        preprocessed_corpus,
        clustering_config=config.clustering,
        dim_reduction_config=config.dim_reduction,
        logger=logger,
    )
    model = clustering.perform_clustering()

    logger.info(f"Save results to {config.output_path}")
    corpus["label"] = model.labels_
    io.write_json(config.input_path.split(".")[0] + "_labeled.json", corpus)
Exemple #3
0
    def __init__(self,
                 model,
                 optimizer,
                 lr_scheduler,
                 loss_criterion,
                 eval_criterion,
                 device,
                 loaders,
                 checkpoint_dir,
                 max_num_epochs=1000,
                 max_num_iterations=None,
                 validate_after_iters=None,
                 log_after_iters=None,
                 validate_iters=None,
                 num_iterations=0,
                 num_epoch=0,
                 eval_score_higher_is_better=True,
                 best_eval_score=None,
                 logger=None,
                 inference_config=None):
        if logger is None:
            self.logger = get_logger('Trainer', level=logging.DEBUG)
        else:
            self.logger = logger
        self.plotter = VisdomLinePlotter('gcn')

        self.logger.info(model)
        self.model = model
        self.optimizer = optimizer
        self.scheduler = lr_scheduler
        self.loss_criterion = loss_criterion
        self.eval_criterion = eval_criterion
        self.device = device
        self.loaders = loaders
        self.checkpoint_dir = checkpoint_dir
        self.max_num_epochs = max_num_epochs
        self.max_num_iterations = max_num_iterations
        self.validate_after_iters = validate_after_iters
        self.log_after_iters = log_after_iters
        self.validate_iters = validate_iters
        self.eval_score_higher_is_better = eval_score_higher_is_better
        self.inference_config = inference_config
        logger.info(
            f'eval_score_higher_is_better: {eval_score_higher_is_better}')

        if best_eval_score is not None:
            self.best_eval_score = best_eval_score
        else:
            # initialize the best_eval_score
            if eval_score_higher_is_better:
                self.best_eval_score = float('-inf')
            else:
                self.best_eval_score = float('+inf')

        self.writer = SummaryWriter(
            log_dir=os.path.join(checkpoint_dir, 'logs'))

        self.num_iterations = num_iterations
        self.num_epoch = num_epoch
Exemple #4
0
def main():
    # Create main logger
    logger = get_logger('GCN Trainer')

    parser = argparse.ArgumentParser(description='GCN training')
    parser.add_argument('--config', type=str, help='Path to the YAML config file', default = CONFIG_PATH)
    args = parser.parse_args()

    # Load and log experiment configuration
    config = load_config(args.config)
    logger.info(config)

    manual_seed = config.get('manual_seed', None)
    if manual_seed is not None:
        logger.info(f'Seed the RNG for all devices with {manual_seed}')
        torch.manual_seed(manual_seed)
        torch.backends.cudnn.deterministic = True
        torch.backends.cudnn.benchmark = False

    # Create the model
    module_path = "models.gcn.model"
    if torch.cuda.device_count() > 1:
        model = torch.nn.DataParallel(_get_model(module_path, config))
    else:
        model = _get_model(module_path, config)

    # put the model on GPUs
    logger.info(f"Sending the model to '{config['device']}', using {torch.cuda.device_count()} GPUs...")
    model = model.to(config['device'])
    # weights initialization
    model.apply(weights_init)
    
    # Log the number of learnable parameters
    logger.info(f'Number of learnable params {get_number_of_learnable_parameters(model)}')

    # Create loss criterion
    loss_criterion = get_loss_criterion(config)
    # Create evaluation metric
    eval_criterion = get_evaluation_metric(config)

    # Create data loaders
    loaders = get_data_loaders(config)

    # Create the optimizer
    optimizer = _create_optimizer(config, model)

    # Create learning rate adjustment strategy
    lr_scheduler = _create_lr_scheduler(config, optimizer)

    # Create model trainer
    trainer = _create_trainer(config, model=model, optimizer=optimizer, lr_scheduler=lr_scheduler,
                              loss_criterion=loss_criterion, eval_criterion=eval_criterion, loaders=loaders,
                              logger=logger)
    # Start training
    trainer.fit()
    print('best evaluation score is:', trainer.best_eval_score)
Exemple #5
0
def get_test_loaders(config):
    """
    Returns a list of DataLoader, one per each test file.

    :param config: a top level configuration object containing the 'datasets' key
    :return: generator of DataLoader objects
    """

    def my_collate(batch):
        error_msg = "batch must contain tensors or slice; found {}"
        if isinstance(batch[0][0], torch.Tensor):
            out = batch[0][0]
            if isinstance(batch[0][2], torch.Tensor):
                out2 = (batch[0][2]).unsqueeze(0)
            else:
                out2 = None
            return out.unsqueeze(0), batch[0][1], out2, batch[0][3]
        raise TypeError((error_msg.format(type(batch[0]))))

    logger = get_logger('TestDataset')

    assert 'loaders' in config, 'Could not find data loaders configuration'
    loaders_config = config['loaders']

    # get test files
    test_paths = loaders_config['test_path']
    assert isinstance(test_paths, list)
    # get test patch size and stride
    test_patch = tuple(loaders_config['test_patch'])
    test_stride = tuple(loaders_config['test_stride'])
    # get clip value
    clip_val = tuple(loaders_config['clip_val'])
    num_workers = loaders_config.get('num_workers', 1)

    slice_builder_str = loaders_config.get('slice_builder', 'SliceBuilder')
    logger.info(f'Slice builder class: {slice_builder_str}')
    slice_builder_cls = _get_slice_builder_cls(slice_builder_str)

    for test_path in test_paths:
        assert os.path.exists(test_path)
        try:
            logger.info(f'Loading testing set from: {test_path}...')
            with open(test_path) as f:
                for line in f:
                    name, file_path, label_path = line.split()[0:3]
                    logger.info(f'Create testing dataset from: {name}...')
                    test_dataset = NiftiDataset(file_path, test_patch, test_stride, phase = 'test',
                                                 label_path = label_path, clip_val = clip_val,
                                                 transformer_config = loaders_config['transformer'],
                                                 slice_builder_cls = slice_builder_cls)
                    # use generator in order to create data loaders lazily one by one
                    yield DataLoader(test_dataset, batch_size = 1, num_workers=num_workers, collate_fn=my_collate)
        except Exception:
            logger.info(f'Skipping testing set: {test_path}', exc_info=True)
Exemple #6
0
def main():
    # Create main logger
    logger = get_logger('UNet3DTrainer')

    parser = argparse.ArgumentParser(description='UNet3D training')
    parser.add_argument('--config', type=str, help='Path to the YAML config file', default='/home/SENSETIME/shenrui/Dropbox/SenseTime/edgeDL/resources/train_config_unet.yaml')
    args = parser.parse_args()

    # Load and log experiment configuration
    config = load_config(args.config)
    logger.info(config)

    manual_seed = config.get('manual_seed', None)
    if manual_seed is not None:
        logger.info(f'Seed the RNG for all devices with {manual_seed}')
        torch.manual_seed(manual_seed)
        # see https://pytorch.org/docs/stable/notes/randomness.html
        torch.backends.cudnn.deterministic = True
        torch.backends.cudnn.benchmark = False

    # Create the model
    model = get_model(config)
    # put the model on GPUs
    logger.info(f"Sending the model to '{config['device']}'")
    model = model.to(config['device'])
    # Log the number of learnable parameters
    logger.info(f'Number of learnable params {get_number_of_learnable_parameters(model)}')

    # Create loss criterion
    loss_criterion = get_loss_criterion(config)
    # Create evaluation metric
    eval_criterion = get_evaluation_metric(config)

    # Create data loaders
    loaders = get_train_loaders(config)


    

    # Create the optimizer
    optimizer = _create_optimizer(config, model)

    # Create learning rate adjustment strategy
    lr_scheduler = _create_lr_scheduler(config, optimizer)

    # Create model trainer
    trainer = _create_trainer(config, model=model, optimizer=optimizer, lr_scheduler=lr_scheduler,
                              loss_criterion=loss_criterion, eval_criterion=eval_criterion, loaders=loaders,
                              logger=logger)
    # Start training
    trainer.fit()
Exemple #7
0
def main():
    # Create main logger
    logger = get_logger('CASENetPredictor')

    parser = argparse.ArgumentParser(description='CASENet2D testing')
    parser.add_argument(
        '--config',
        type=str,
        help='Path to the YAML config file',
        default=
        '/home/SENSETIME/shenrui/Dropbox/SenseTime/edgeDL/resources/test_config_backup.yaml'
    )
    args = parser.parse_args()

    # Load and log experiment configuration
    config = load_config(args.config)
    logger.info(config)

    # Create the model
    model = get_model(config)

    # Load model state
    model_path = config['model_path']
    logger.info(f'Loading model from {model_path}...')
    load_checkpoint(model_path, model)
    logger.info(f"Sending the model to '{config['device']}'")
    model = model.to(config['device'])
    folderpath = config['save_path']
    logger.info(f'Destination of predictions is {folderpath}...')

    logger.info('Loading datasets...')

    eval_score_avg = 0
    count = 0
    for test_loader in get_test_loaders(config):
        logger.info(f"Processing '{test_loader.dataset.file_path}'...")

        output_file = _get_output_file(test_loader.dataset,
                                       folderpath=folderpath)
        # run the model prediction on the entire dataset and save to nifti image
        eval_score, num = predict(model, test_loader, output_file, config,
                                  logger)
        eval_score_avg = (eval_score_avg * count + eval_score * num) / (count +
                                                                        num)
        count += num
        logger.info(
            f'Testing finished. Average evaluation score: {eval_score}. Saving predictions to: {output_file}...'
        )
        logger.info(f'Total average evaluation score: {eval_score_avg}')
Exemple #8
0
import importlib

import numpy as np
import torch
import torch.nn.functional as F
from skimage import measure

from models.casenet2d.losses import compute_per_channel_dice, expand_as_one_hot
from utils.helper import get_logger, adapted_rand

LOGGER = get_logger('EvalMetric')

SUPPORTED_METRICS = [
    'dice', 'iou', 'boundary_ap', 'dt_ap', 'quantized_dt_ap', 'angle',
    'inverse_angular'
]


class DiceCoefficient:
    """Computes Dice Coefficient.
    Generalized to multiple channels by computing per-channel Dice Score
    (as described in https://arxiv.org/pdf/1707.03237.pdf) and theTn simply taking the average.
    Input is expected to be probabilities instead of logits.
    This metric is mostly useful when channels contain the same semantic class (e.g. affinities computed with different offsets).
    DO NOT USE this metric when training with DiceLoss, otherwise the results will be biased towards the loss.
    """
    def __init__(self,
                 skip_channels=(),
                 epsilon=1e-5,
                 ignore_index=None,
                 **kwargs):
Exemple #9
0
import importlib

import numpy as np
import torch
import torch.nn.functional as F
from skimage import measure

from models.casenet3d.losses import compute_per_channel_dice, expand_as_one_hot
from utils.helper import get_logger, adapted_rand
import warnings
warnings.filterwarnings("ignore")

logger = get_logger('EvalMetric')

SUPPORTED_METRICS = ['DiceCoefficient', 'MeanIoU', 'PrecisionStats', 'STEALEdgeLoss']


class DiceCoefficient:
    """Computes Dice Coefficient.
    Generalized to multiple channels by computing per-channel Dice Score
    (as described in https://arxiv.org/pdf/1707.03237.pdf) and theTn simply taking the average.
    Input is expected to be probabilities instead of logits.
    This metric is mostly useful when channels contain the same semantic class (e.g. affinities computed with different offsets).
    DO NOT USE this metric when training with DiceLoss, otherwise the results will be biased towards the loss.
    """

    def __init__(self, skip_channels=(), epsilon=1e-10, ignore_index=None, **kwargs):
        self.epsilon = epsilon
        self.ignore_index = ignore_index
        self.skip_channels = skip_channels
Exemple #10
0
def get_train_loaders(config):
    """
    Returns dictionary containing the training and validation loaders

    :param config: a top level configuration object containing the 'loaders' key
    :return: dict {
        'train': <train_loader>
        'val': <val_loader>
    }
    """
    assert 'loaders' in config, 'Could not find data loaders configuration'
    loaders_config = config['loaders']

    logger = get_logger('TrainDataset')
    logger.info('Creating training and validation set loaders...')

    # get train and validation files
    train_paths = loaders_config['train_path']
    val_paths = loaders_config['val_path']
    assert isinstance(train_paths, list)
    assert isinstance(val_paths, list)

    # get train/validation patch size and stride
    train_patch = tuple(loaders_config['train_patch'])
    train_stride = tuple(loaders_config['train_stride'])
    val_patch = tuple(loaders_config['val_patch'])
    val_stride = tuple(loaders_config['val_stride'])
    # get clip value
    clip_val = tuple(loaders_config['clip_val'])

    slice_builder_str = loaders_config.get('slice_builder', 'SliceBuilder')
    logger.info(f'Slice builder class: {slice_builder_str}')
    slice_builder_cls = _get_slice_builder_cls(slice_builder_str)

    # create nifti backed training and validation dataset with data augmentation
    train_datasets = []
    for train_path in train_paths:
        assert os.path.exists(train_path)
        try:
            logger.info(f'Loading training set from: {train_path}...')
            with open(train_path) as f:
                for line in f:
                    name, file_path, label_path = line.split()[0:3]
                    logger.info(f'Create training dataset from: {name}...')
                    train_dataset = NiftiDataset(file_path, train_patch, train_stride, phase = 'train',
                                                 label_path = label_path, clip_val = clip_val,
                                                 transformer_config = loaders_config['transformer'],
                                                 slice_builder_cls = slice_builder_cls)
                    train_datasets.append(train_dataset)
        except Exception:
            logger.info(f'Skipping training set: {train_path}', exc_info=True)

    val_datasets = []
    for val_path in val_paths:
        assert os.path.exists(val_path)
        try:
            logger.info(f'Loading validation set from: {val_path}...')
            with open(val_path) as f:
                for line in f:
                    name, file_path, label_path = line.split()[0:3]
                    logger.info(f'Create validation dataset from: {name}...')
                    val_dataset = NiftiDataset(file_path, val_patch, val_stride, phase = 'val',
                                                 label_path = label_path, clip_val = clip_val,
                                                 transformer_config = loaders_config['transformer'],
                                                 slice_builder_cls = slice_builder_cls)
                    val_datasets.append(val_dataset)
        except Exception:
            logger.info(f'Skipping validation set: {val_path}', exc_info=True)

    num_workers = loaders_config.get('num_workers', 1)
    batch_size = loaders_config.get('batch_size', 1)
    logger.info(f'Number of workers for train/val datasets: {num_workers}')
    # when training with volumetric data use batch_size of 1 due to GPU memory constraints
    return {
        'train': DataLoader(ConcatDataset(train_datasets), batch_size=batch_size, shuffle=True, num_workers=num_workers, pin_memory=True),
        'val': DataLoader(ConcatDataset(val_datasets), batch_size=batch_size, shuffle=True, num_workers=num_workers, pin_memory=True)
    }
Exemple #11
0
def main():
    # Create main logger
    logger = get_logger('CASENetTrainer')

    parser = argparse.ArgumentParser(description='CASENet training')
    parser.add_argument('--config', type=str, help='Path to the YAML config file', default='/home/SENSETIME/shenrui/Dropbox/SenseTime/edgeDL/resources/train_config_backup.yaml')
    args = parser.parse_args()

    # Load and log experiment configuration
    config = load_config(args.config)
    logger.info(config)

    manual_seed = config.get('manual_seed', None)
    if manual_seed is not None:
        logger.info(f'Seed the RNG for all devices with {manual_seed}')
        torch.manual_seed(manual_seed)
        # see https://pytorch.org/docs/stable/notes/randomness.html
        torch.backends.cudnn.deterministic = True
        torch.backends.cudnn.benchmark = False

    dim = config.get('dim', None)
    if dim == 2:
        from models.casenet2d.losses import get_loss_criterion
        from models.casenet2d.metrics import get_evaluation_metric
        module_path = 'models.casenet2d.model'
    elif dim == 3:
        from models.casenet3d.losses import get_loss_criterion
        from models.casenet3d.metrics import get_evaluation_metric
        module_path = 'models.casenet3d.model'
    else:
        raise ValueError(f"Unsupported dimensions '{dim}'")

    # Create the model
    if torch.cuda.device_count() > 1:
        model = nn.DataParallel(_get_model(module_path, config))
    else:
        model = _get_model(module_path, config)
    # put the model on GPUs
    logger.info(f"Sending the model to '{config['device']}', using {torch.cuda.device_count()} GPUs...")
    model = model.to(config['device'])
    # Log the number of learnable parameters
    logger.info(f'Number of learnable params {get_number_of_learnable_parameters(model)}')

    # Create loss criterion
    loss_criterion = get_loss_criterion(config)
    # Create evaluation metric
    eval_criterion = get_evaluation_metric(config)

    # Create data loaders
    loaders = get_train_loaders(config)

    # Create the optimizer
    optimizer = _create_optimizer(config, model)

    # Create learning rate adjustment strategy
    lr_scheduler = _create_lr_scheduler(config, optimizer)

    # Create model trainer
    trainer = _create_trainer(config, model=model, optimizer=optimizer, lr_scheduler=lr_scheduler,
                              loss_criterion=loss_criterion, eval_criterion=eval_criterion, loaders=loaders,
                              logger=logger)
    # Start training
    trainer.fit()