예제 #1
0
def main():
    """ Main method. """
    args = PARSER.parse_known_args()[0]

    # sets up the backend for distributed training (optional)
    device, local_rank = setup(distributed=args.distributed)

    # retrieve the dataloaders for the chosen dataset
    dataloaders, args = get_dataloaders(args)

    # make dirs for current experiment logs, summaries etc
    args = experiment_config(args)

    # initialise the model
    model = resnet.resnet20(args)

    # place model onto GPU(s)
    if args.distributed:
        torch.cuda.set_device(device)
        torch.set_num_threads(5)  # n cpu threads / n processes per node
        model = DistributedDataParallel(model.cuda(),
                                        device_ids=[local_rank],
                                        output_device=local_rank)
        # only print stuff from process (rank) 0
        args.print_progress = True if int(
            os.environ.get('RANK')) == 0 else False
    else:
        if args.half_precision:
            model.half()  # convert to half precision
            for layer in model.modules():
                # keep batchnorm in 32 for convergence reasons
                if isinstance(layer, nn.BatchNorm2d):
                    layer.float()

        if torch.cuda.device_count() > 1:
            model = nn.DataParallel(model)
        print('\nUsing', torch.cuda.device_count(), 'GPU(s).\n')
        model.to(device)
        args.print_progress = True

    if args.print_progress:
        print_network(model, args)  # prints out the network architecture etc
        logging.info('\ntrain: {} - valid: {} - test: {}'.format(
            len(dataloaders['train'].dataset),
            len(dataloaders['valid'].dataset),
            len(dataloaders['test'].dataset)))

    # launch model training or inference
    if not args.inference:
        train(model, dataloaders, args)

        if args.distributed:  # cleanup
            torch.distributed.destroy_process_group()
    else:
        model.load_state_dict(torch.load(args.load_checkpoint_dir))
        test_loss, test_acc = evaluate(model, args, dataloaders['test'])
        print('[Test] loss {:.4f} - acc {:.4f} - acc_topk {:.4f}'.format(
            test_loss, test_acc[0], test_acc[1]))
예제 #2
0
파일: train.py 프로젝트: leibo-cmu/MatSeg
def train(args):
    Arguments.save_args(args, args.args_path)
    train_loader, val_loader, _ = get_dataloaders(args)
    model = UNetVgg16(n_classes=args.n_classes).to(args.device)
    optimizer = get_optimizer(args.optimizer, model)
    lr_scheduler = LRScheduler(args.lr_scheduler, optimizer)
    criterion = get_loss_fn(args.loss_type, args.ignore_index).to(args.device)
    model_saver = ModelSaver(args.model_path)
    recorder = Recorder(['train_miou', 'train_acc', 'train_loss',
                         'val_miou', 'val_acc', 'val_loss'])
    for epoch in range(args.n_epochs):
        print(f"{args.experim_name} Epoch {epoch+1}:")
        train_loss, train_acc, train_miou, train_ious = train_epoch(
            model=model,
            dataloader=train_loader,
            n_classes=args.n_classes,
            optimizer=optimizer,
            lr_scheduler=lr_scheduler,
            criterion=criterion,
            device=args.device,
        )
        print(f"train | mIoU: {train_miou:.3f} | accuracy: {train_acc:.3f} | loss: {train_loss:.3f}")
        val_loss, val_scores = eval_epoch(
            model=model,
            dataloader=val_loader,
            n_classes=args.n_classes,
            criterion=criterion,
            device=args.device,
        )
        val_miou, val_ious, val_acc = val_scores['mIoU'], val_scores['IoUs'], val_scores['accuracy']
        print(f"valid | mIoU: {val_miou:.3f} | accuracy: {val_acc:.3f} | loss: {val_loss:.3f}")
        recorder.update([train_miou, train_acc, train_loss, val_miou, val_acc, val_loss])
        recorder.save(args.record_path)
        if args.metric.startswith("IoU"):
            metric = val_ious[int(args.metric.split('_')[1])]
        else: metric = val_miou
        model_saver.save_models(metric, epoch+1, model,
                                ious={'train': train_ious, 'val': val_ious})

    print(f"best model at epoch {model_saver.best_epoch} with miou {model_saver.best_score:.5f}")
예제 #3
0
파일: eval.py 프로젝트: leibo-cmu/MatSeg
def evaluate(args, mode, save_pred=False):
    _, val_loader, test_loader = get_dataloaders(args)
    if mode == 'val':
        dataloader = val_loader
    elif mode == 'test':
        dataloader = test_loader
    else:
        raise ValueError(f"{mode} not supported. Choose from 'val' or 'test'")
    model = UNetVgg16(n_classes=args.n_classes).to(args.device)
    model.load_state_dict(torch.load(args.model_path)['model_state_dict'],
                          strict=False)
    criterion = get_loss_fn(args.loss_type, args.ignore_index).to(args.device)
    eval_loss, scores = eval_epoch(model=model,
                                   dataloader=dataloader,
                                   n_classes=args.n_classes,
                                   criterion=criterion,
                                   device=args.device,
                                   pred_dir=save_pred and args.pred_dir)
    miou, acc = scores['mIoU'], scores['accuracy']
    print(
        f"{mode} | mIoU: {miou:.3f} | accuracy: {acc:.3f} | loss: {eval_loss:.3f}"
    )
    return scores
import sys
import torch
from torch import nn

import argparse

DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'

if __name__ == '__main__':

    parser = argparse.ArgumentParser('nn models for inverse design')
    parser.add_argument('--model', type=str, default='inn')
    args = parser.parse_args()

    train_loader, val_loader, test_loader = get_dataloaders(args.model)
    configs = get_configs(args.model)

    if args.model in ['forward_model', 'inverse_model']:
        model = MLP(configs['input_dim'], configs['output_dim']).to(DEVICE)
        optimizer = torch.optim.Adam(model.parameters(), lr=configs['learning_rate'], weight_decay=configs['weight_decay'])
    
    elif args.model in ['tandem_net']:

        forward_model = MLP(4, 3).to(DEVICE)
        forward_model.load_state_dict(torch.load('./models/forward_model.pth')['model_state_dict'])
        inverse_model = MLP(3, 4).to(DEVICE)
        inverse_model.load_state_dict(torch.load('./models/inverse_model.pth')['model_state_dict'])
        model = TandemNet(forward_model, inverse_model)
        optimizer = torch.optim.Adam(model.inverse_model.parameters(), lr=configs['learning_rate'], weight_decay=configs['weight_decay'])
        
예제 #5
0
from datasets import get_dataloaders
from cfg import Config

cfg = Config()
train_loader, val_loader, test_loader = get_dataloaders(cfg)
예제 #6
0
def main():
    """ Main """

    # Arguments
    args = parser.parse_args()

    # Setup Distributed Training
    device, local_rank = setup(distributed=args.distributed)

    # Get Dataloaders for Dataset of choice
    dataloaders, args = get_dataloaders(args)

    # Setup logging, saving models, summaries
    args = experiment_config(parser, args)

    # Get available models from /model/network.py
    model_names = sorted(name for name in models.__dict__
                         if name.islower() and not name.startswith("__")
                         and callable(models.__dict__[name]))

    # If model exists
    if any(args.model in model_name for model_name in model_names):

        # Load model
        base_encoder = getattr(models, args.model)(
            args, num_classes=args.n_classes)  # Encoder

        proj_head = models.projection_MLP(args)
        sup_head = models.Sup_Head(args)

    else:
        raise NotImplementedError("Model Not Implemented: {}".format(
            args.model))

    # Remove last FC layer from resnet
    base_encoder.fc = nn.Sequential()

    # Place model onto GPU(s)
    if args.distributed:
        torch.cuda.set_device(device)
        torch.set_num_threads(6)  # n cpu threads / n processes per node

        base_encoder = DistributedDataParallel(base_encoder.cuda(),
                                               device_ids=[local_rank],
                                               output_device=local_rank,
                                               find_unused_parameters=True,
                                               broadcast_buffers=False)
        proj_head = DistributedDataParallel(proj_head.cuda(),
                                            device_ids=[local_rank],
                                            output_device=local_rank,
                                            find_unused_parameters=True,
                                            broadcast_buffers=False)

        sup_head = DistributedDataParallel(sup_head.cuda(),
                                           device_ids=[local_rank],
                                           output_device=local_rank,
                                           find_unused_parameters=True,
                                           broadcast_buffers=False)

        # Only print from process (rank) 0
        args.print_progress = True if int(
            os.environ.get('RANK')) == 0 else False
    else:
        # If non Distributed use DataParallel
        if torch.cuda.device_count() > 1:
            base_encoder = nn.DataParallel(base_encoder)
            proj_head = nn.DataParallel(proj_head)
            sup_head = nn.DataParallel(sup_head)

        print('\nUsing', torch.cuda.device_count(), 'GPU(s).\n')

        base_encoder.to(device)
        proj_head.to(device)
        sup_head.to(device)

        args.print_progress = True

    # Print Network Structure and Params
    if args.print_progress:
        print_network(base_encoder,
                      args)  # prints out the network architecture etc
        logging.info('\npretrain/train: {} - valid: {} - test: {}'.format(
            len(dataloaders['train'].dataset),
            len(dataloaders['valid'].dataset),
            len(dataloaders['test'].dataset)))

    # launch model training or inference
    if not args.finetune:
        ''' Pretraining / Finetuning / Evaluate '''

        if not args.supervised:
            # Pretrain the encoder and projection head
            proj_head.apply(init_weights)

            pretrain(base_encoder, proj_head, dataloaders, args)
        else:
            supervised(base_encoder, sup_head, dataloaders, args)

        print("\n\nLoading the model: {}\n\n".format(args.load_checkpoint_dir))

        # Load the pretrained model
        checkpoint = torch.load(args.load_checkpoint_dir)

        # Load the encoder parameters
        base_encoder.load_state_dict(checkpoint['encoder'])

        # Initalize weights of the supervised / classification head
        sup_head.apply(init_weights)

        # Supervised Finetuning of the supervised classification head
        finetune(base_encoder, sup_head, dataloaders, args)

        # Evaluate the pretrained model and trained supervised head
        test_loss, test_acc, test_acc_top5 = evaluate(base_encoder, sup_head,
                                                      dataloaders, 'test',
                                                      args.finetune_epochs,
                                                      args)

        print('[Test] loss {:.4f} - acc {:.4f} - acc_top5 {:.4f}'.format(
            test_loss, test_acc, test_acc_top5))

        if args.distributed:  # cleanup
            torch.distributed.destroy_process_group()
    else:
        ''' Finetuning / Evaluate '''

        # Do not Pretrain, just finetune and inference

        print("\n\nLoading the model: {}\n\n".format(args.load_checkpoint_dir))

        # Load the pretrained model
        checkpoint = torch.load(args.load_checkpoint_dir)

        # Load the encoder parameters
        base_encoder.load_state_dict(checkpoint['encoder'])  # .cuda()

        # Initalize weights of the supervised / classification head
        sup_head.apply(init_weights)

        # Supervised Finetuning of the supervised classification head
        finetune(base_encoder, sup_head, dataloaders, args)

        # Evaluate the pretrained model and trained supervised head
        test_loss, test_acc, test_acc_top5 = evaluate(base_encoder, sup_head,
                                                      dataloaders, 'test',
                                                      args.finetune_epochs,
                                                      args)

        print('[Test] loss {:.4f} - acc {:.4f} - acc_top5 {:.4f}'.format(
            test_loss, test_acc, test_acc_top5))

        if args.distributed:  # cleanup
            torch.distributed.destroy_process_group()
예제 #7
0
if __name__ == "__main__":
    # parse args
    parser = util.get_train_parser()

    args = parser.parse_args()

    if args.seed is not None:
        if args.device == 'cuda':
            use_cuda=True
        elif args.device == 'cpu':
            use_cuda=False
        util.random_seed(seed_value=args.seed, use_cuda=use_cuda)

    # load in data generator
    dataloaders = datasets.get_dataloaders(args.country, args.dataset, args)
    # load in model
    model = models.get_model(**vars(args))
    if args.model_name in DL_MODELS:
        print('Total trainable model parameters: {}'.format(sum(p.numel() for p in model.parameters() if p.requires_grad)))

    if args.model_path is not None:
        model.load_state_dict(torch.load(args.model_path))

    if args.model_name in DL_MODELS and args.device == 'cuda' and torch.cuda.is_available():
        model.to(args.device)

    if args.name is None:
        args.name = str(datetime.datetime.now()) + "_" + args.model_name

    
        hps[hp] = []

    experiments = {}

    # for some number of iterations
    for sample_no in range(search_range.num_samples):
        # build argparse args by parsing args and then setting empty fields to specified ones above
        train_parser = util.get_train_parser()
        train_args = train_parser.parse_args([
            '--model_name', search_range.model_name, '--dataset',
            search_range.dataset, '--env_name', search_range.env_name,
            '--country', search_range.country
        ])
        generate_hps(train_args, search_range)
        train_args.epochs = search_range.epochs
        dataloaders = datasets.get_dataloaders(train_args.country,
                                               train_args.dataset, train_args)

        model = models.get_model(**vars(train_args))
        model.to(train_args.device)
        experiment_name = f"model:{train_args.model_name}_dataset:{train_args.dataset}_epochs:{search_range.epochs}_sample_no:{sample_no}"

        train_args.name = experiment_name
        print("=" * 100)
        print(f"TRAINING: {experiment_name}")
        for hp in hps:
            print(hp, train_args.__dict__[hp])
        try:
            train.train(model,
                        train_args.model_name,
                        train_args,
                        dataloaders=dataloaders)
예제 #9
0
def main():
    """ Main """

    # Arguments
    args = parser.parse_args()

    # Setup Distributed Training
    device, local_rank = setup(distributed=args.distributed)

    # Get Dataloaders for Dataset of choice
    dataloaders, args = get_dataloaders(args)

    # Setup logging, saving models, summaries
    args = experiment_config(parser, args)
    ''' Base Encoder '''

    # Get available models from /model/network.py
    model_names = sorted(name for name in models.__dict__
                         if name.islower() and not name.startswith("__")
                         and callable(models.__dict__[name]))

    # If model exists
    if any(args.model in model_name for model_name in model_names):
        # Load model
        base_encoder = getattr(models, args.model)(
            args, num_classes=args.n_classes)  # Encoder

    else:
        raise NotImplementedError("Model Not Implemented: {}".format(
            args.model))

    if not args.supervised:
        # freeze all layers but the last fc
        for name, param in base_encoder.named_parameters():
            if name not in ['fc.weight', 'fc.bias']:
                param.requires_grad = False
        # init the fc layer
        init_weights(base_encoder)
    ''' MoCo Model '''
    moco = MoCo_Model(args,
                      queue_size=args.queue_size,
                      momentum=args.queue_momentum,
                      temperature=args.temperature)

    # Place model onto GPU(s)
    if args.distributed:
        torch.cuda.set_device(device)
        torch.set_num_threads(6)  # n cpu threads / n processes per node

        moco = DistributedDataParallel(moco.cuda(),
                                       device_ids=[local_rank],
                                       output_device=local_rank,
                                       find_unused_parameters=True,
                                       broadcast_buffers=False)
        base_encoder = DistributedDataParallel(base_encoder.cuda(),
                                               device_ids=[local_rank],
                                               output_device=local_rank,
                                               find_unused_parameters=True,
                                               broadcast_buffers=False)

        # Only print from process (rank) 0
        args.print_progress = True if int(
            os.environ.get('RANK')) == 0 else False
    else:
        # If non Distributed use DataParallel
        if torch.cuda.device_count() > 1:
            moco = nn.DataParallel(moco)
            base_encoder = nn.DataParallel(base_encoder)

        print('\nUsing', torch.cuda.device_count(), 'GPU(s).\n')

        moco.to(device)
        base_encoder.to(device)

        args.print_progress = True

    # Print Network Structure and Params
    if args.print_progress:
        print_network(moco, args)  # prints out the network architecture etc
        logging.info('\npretrain/train: {} - valid: {} - test: {}'.format(
            len(dataloaders['train'].dataset),
            len(dataloaders['valid'].dataset),
            len(dataloaders['test'].dataset)))

    # launch model training or inference
    if not args.finetune:
        ''' Pretraining / Finetuning / Evaluate '''

        if not args.supervised:
            # Pretrain the encoder and projection head
            pretrain(moco, dataloaders, args)

            # Load the state_dict from query encoder and load it on finetune net
            base_encoder = load_moco(base_encoder, args)

        else:
            supervised(base_encoder, dataloaders, args)

            # Load the state_dict from query encoder and load it on finetune net
            base_encoder = load_sup(base_encoder, args)

        # Supervised Finetuning of the supervised classification head
        finetune(base_encoder, dataloaders, args)

        # Evaluate the pretrained model and trained supervised head
        test_loss, test_acc, test_acc_top5 = evaluate(base_encoder,
                                                      dataloaders, 'test',
                                                      args.finetune_epochs,
                                                      args)

        print('[Test] loss {:.4f} - acc {:.4f} - acc_top5 {:.4f}'.format(
            test_loss, test_acc, test_acc_top5))

        if args.distributed:  # cleanup
            torch.distributed.destroy_process_group()
    else:
        ''' Finetuning / Evaluate '''

        # Do not Pretrain, just finetune and inference
        # Load the state_dict from query encoder and load it on finetune net
        base_encoder = load_moco(base_encoder, args)

        # Supervised Finetuning of the supervised classification head
        finetune(base_encoder, dataloaders, args)

        # Evaluate the pretrained model and trained supervised head
        test_loss, test_acc, test_acc_top5 = evaluate(base_encoder,
                                                      dataloaders, 'test',
                                                      args.finetune_epochs,
                                                      args)

        print('[Test] loss {:.4f} - acc {:.4f} - acc_top5 {:.4f}'.format(
            test_loss, test_acc, test_acc_top5))

        if args.distributed:  # cleanup
            torch.distributed.destroy_process_group()
예제 #10
0
def main():
    """ Main method. """
    args = PARSER.parse_known_args()[0]
    if args.extract_representation == True:
        if args.inference == False:
            print(
                'Error, to extract_representation, add "--inference" to the program call'
            )
            return
    if args.distributed == False:
        if args.visible_gpus != 'all':
            os.environ['CUDA_VISIBLE_DEVICES'] = str(args.visible_gpus)
    # sets up the backend for distributed training (optional)
    device, local_rank = setup(distributed=args.distributed)

    # retrieve the dataloaders for the chosen dataset
    dataloaders, args = get_dataloaders(args)

    # make dirs for current experiment logs, summaries etc
    args = experiment_config(args)

    # initialise the model
    # model = resnet.resnet32(args)
    model = models4finetuning.initialize_model(
        model_name=args.use_net,
        num_classes=2,
        feature_extract=args.as_feature_extractor,
        use_pretrained=args.use_pretrained)
    ## pretrained models available [resnet, alexnet, vgg, squeezenet, densenet, inception]

    # place model onto GPU(s)
    if args.distributed:
        torch.cuda.set_device(device)
        torch.set_num_threads(1)  # n cpu threads / n processes per node
        model = DistributedDataParallel(model.cuda(),
                                        device_ids=[local_rank],
                                        output_device=local_rank)
        # only print stuff from process (rank) 0
        args.print_progress = True if int(
            os.environ.get('RANK')) == 0 else False
    else:
        if args.half_precision:
            model.half()  # convert to half precision
            for layer in model.modules():
                # keep batchnorm in 32 for convergence reasons
                if isinstance(layer, nn.BatchNorm2d):
                    layer.float()

        if torch.cuda.device_count() > 1:
            model = nn.DataParallel(model)
        print('\nUsing', torch.cuda.device_count(), 'GPU(s).\n')
        model.to(device)
        args.print_progress = True

    if args.print_progress:
        print_network(model, args)  # prints out the network architecture etc
        logging.info('\ntrain: {} - valid: {} - test: {}'.format(
            len(dataloaders['train'].dataset),
            len(dataloaders['valid'].dataset),
            len(dataloaders['test'].dataset)))

    # launch model training or inference
    if not args.inference:
        train(model, dataloaders, args)

        if args.distributed:  # cleanup
            torch.distributed.destroy_process_group()
    else:

        model.load_state_dict(torch.load(args.load_checkpoint_dir))
        if args.extract_representation == True:
            test_loss, test_acc, test_sn, test_sp, test_ppv, test_f1score, all_intermediate_features, all_labels, all_predictions, all_output_activations = evaluate(
                model, dataloaders['test'], args)
            name_to_save = '/'.join(
                args.summaries_dir.split('/')[:-1]) + '/extracted_features.mat'
            savemat(
                name_to_save, {
                    'features': all_intermediate_features,
                    'labels': all_labels,
                    'predictions': all_predictions,
                    'activations': all_output_activations
                })
        else:
            test_loss, test_acc, test_sn, test_sp, test_ppv, test_f1score = evaluate(
                model, dataloaders['test'], args)

        logging.info(
            f'[Test] loss {test_loss:.4f} - acc: {test_acc[0]:.4f} - sn: {test_sn:.4f} - sp: {test_sp:.4f} - ppv: {test_ppv:.4f} - F1: {test_f1score:.4f}'
        )