Ejemplo n.º 1
0
def enable_accimage() -> None:
    if is_accimage_available:
        import torchvision

        torchvision.set_image_backend("accimage")
    else:
        logger.warning("accimage is not available")
Ejemplo n.º 2
0
 def __init__(self, txt_file, root_dir, transform=None):
     """
     Args:
         txt_file (string): Path to the txt file with annotations.
         root_dir (string): Directory with all the images.
         transform (callable, optional): Optional transform to be applied
             on a sample.
     """
     torchvision.set_image_backend('accimage')
     self.name_list = np.loadtxt(os.path.join(root_dir['image'], txt_file), dtype="str", delimiter=',')
     self.root_dir = root_dir
     self.img_root_dir = root_dir['image']
     self.part_root_dir = root_dir['parts']
     self.transform = transform
     self.label_name = {
           'eyebrow1',
           'eyebrow2',
           'eye1',
           'eye2',
           'nose',
           'mouth'
     }
     self.parts_range = {
         'eyebrow1': range(2, 3),
         'eyebrow2': range(3, 4),
         'eye1': range(4, 5),
         'eye2': range(5, 6),
         'nose': range(6, 7),
         'mouth': range(7, 10)
     }
    def __init__(self,
                 words_limit,
                 base_size=64,
                 stage_num=3,
                 trans_norm=None,
                 mode="train",
                 use_acc=False):
        self.words_limit = words_limit
        self.stage_num = stage_num
        self.mode = mode
        self.use_acc = use_acc
        self.img_insts = None

        if use_acc:
            set_image_backend('accimage')
        img_size = base_size * (2 * (stage_num - 1))
        first_size = int(img_size * 76 / 64)
        self.first_resize = transforms.Resize(first_size)
        self.trans_random = transforms.Compose([
            transforms.RandomCrop(img_size),
            transforms.RandomHorizontalFlip(),
        ])
        if trans_norm is None:
            trans_norm = transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
        self.trans_norm = transforms.Compose([
            transforms.ToTensor(),
            trans_norm,
        ])
Ejemplo n.º 4
0
 def __init__(self, txt_file, img_root_dir, part_root_dir, transform=None):
     """
     Args:
         txt_file (string): Path to the txt file with annotations.
         root_dir (string): Directory with all the images.
         transform (callable, optional): Optional transform to be applied
             on a sample.
     """
     torchvision.set_image_backend('accimage')
     self.name_list = np.loadtxt(os.path.join(img_root_dir, txt_file),
                                 dtype="str",
                                 delimiter=',')
     self.img_root_dir = img_root_dir
     self.part_root_dir = part_root_dir
     self.transform = transform
     self.label_name = {
         2: 'eyebrow1',
         3: 'eyebrow2',
         4: 'eye1',
         5: 'eye2',
         6: 'nose',
         7: 'mouth',
         8: 'mouth',
         9: 'mouth'
     }
Ejemplo n.º 5
0
def get_default_image_loader():
    torchvision.set_image_backend('accimage')
    from torchvision import get_image_backend
    if get_image_backend() == 'accimage':
        return accimage_loader
    else:
        return pil_loader
Ejemplo n.º 6
0
def main(model_path: str,
         cropped_images_dir: str,
         output_csv_path: str,
         detections_json_path: Optional[str],
         classifier_categories_json_path: Optional[str],
         img_size: int,
         batch_size: int,
         num_workers: int) -> None:
    """Main function."""
    # evaluating with accimage is much faster than Pillow or Pillow-SIMD
    tv.set_image_backend('accimage')

    # create dataset
    print('Creating data loader')
    loader = create_loader(
        cropped_images_dir, detections_json_path=detections_json_path,
        img_size=img_size, batch_size=batch_size, num_workers=num_workers)

    label_names = None
    if classifier_categories_json_path is not None:
        with open(classifier_categories_json_path, 'r') as f:
            categories = json.load(f)
        label_names = [categories[str(i)] for i in range(len(categories))]

    # create model
    print('Loading saved model')
    model = torch.jit.load(model_path)
    model, device = train_classifier.prep_device(model)

    test_epoch(model, loader, device=device, label_names=label_names,
               output_csv_path=output_csv_path)
Ejemplo n.º 7
0
def accimage_loader(path):
    try:
        import accimage
        return accimage.Image(path)
    except ModuleNotFoundError:
        # Potentially a decoding problem, fall back to PIL.Image
        torchvision.set_image_backend('PIL')
        return pil_loader(path)
Ejemplo n.º 8
0
def accimage_loader(path):
    torchvision.set_image_backend('accimage')
    import accimage
    try:
        return accimage.Image(path)
    except IOError:
        # Potentially a decoding problem, fall back to PIL.Image
        return pil_loader(path)
Ejemplo n.º 9
0
def main(args, config):
    # Global flags
    torch.manual_seed(0)
    set_image_backend(args.image_backend)
    set_video_backend(args.video_backend)

    task = build_task(config)

    # Load checkpoint, if available.
    checkpoint = load_checkpoint(args.checkpoint_load_path)
    task.set_checkpoint(checkpoint)

    # Load a checkpoint contraining a pre-trained model. This is how we
    # implement fine-tuning of existing models.
    pretrained_checkpoint = load_checkpoint(args.pretrained_checkpoint_path)
    if pretrained_checkpoint is not None:
        assert isinstance(
            task, FineTuningTask
        ), "Can only use a pretrained checkpoint for fine tuning tasks"
        task.set_pretrained_checkpoint(pretrained_checkpoint)

    # Configure hooks to do tensorboard logging, checkpoints and so on
    task.set_hooks(configure_hooks(args, config))

    use_gpu = None
    if args.device is not None:
        use_gpu = args.device == "gpu"
        assert torch.cuda.is_available() or not use_gpu, "CUDA is unavailable"

    # LocalTrainer is used for a single node. DistributedTrainer will setup
    # training to use PyTorch's DistributedDataParallel.
    trainer_class = {
        "none": LocalTrainer,
        "ddp": DistributedTrainer
    }[args.distributed_backend]

    trainer = trainer_class(use_gpu=use_gpu,
                            num_dataloader_workers=args.num_workers)

    logging.info(f"Starting training on rank {get_rank()} worker. "
                 f"World size is {get_world_size()}")
    # That's it! When this call returns, training is done.
    trainer.train(task)

    output_folder = Path(args.checkpoint_folder).resolve()
    logging.info("Training successful!")
    logging.info(
        f'Results of this training run are available at: "{output_folder}"')
Ejemplo n.º 10
0
def main(args, config):
    # Global flags
    torch.manual_seed(0)
    set_image_backend(args.image_backend)
    set_video_backend(args.video_backend)

    task = build_task(config)

    # Load checkpoint, if available.
    if args.checkpoint_load_path:
        task.set_checkpoint(args.checkpoint_load_path)

    # Load a checkpoint contraining a pre-trained model. This is how we
    # implement fine-tuning of existing models.
    if args.pretrained_checkpoint_path:
        assert isinstance(
            task, FineTuningTask
        ), "Can only use a pretrained checkpoint for fine tuning tasks"
        task.set_pretrained_checkpoint(args.pretrained_checkpoint_path)

    # Configure hooks to do tensorboard logging, checkpoints and so on.
    # `configure_hooks` adds default hooks, while extra hooks can be specified
    # in config file and stored in `task.hooks`. Here, we merge them when we
    # set the final hooks of the task.
    task.set_hooks(configure_hooks(args, config) + task.hooks)

    # LocalTrainer is used for a single replica. DistributedTrainer will setup
    # training to use PyTorch's DistributedDataParallel.
    trainer_class = {
        "none": LocalTrainer,
        "ddp": DistributedTrainer
    }[args.distributed_backend]

    trainer = trainer_class()

    logging.info(f"Starting training on rank {get_rank()} worker. "
                 f"World size is {get_world_size()}")
    # That's it! When this call returns, training is done.
    trainer.train(task)

    output_folder = Path(args.checkpoint_folder).resolve()
    logging.info("Training successful!")
    logging.info(
        f'Results of this training run are available at: "{output_folder}"')
Ejemplo n.º 11
0
def accimage_loader(path):
    # 使用torchvision激活acciamge(否则报错包不存在)
    import torchvision
    accimage = torchvision.set_image_backend('accimage')

    # import accimage
    try:
        return accimage.Image(path)
    except IOError:
        # Potentially a decoding problem, fall back to PIL.Image
        return pil_loader(path)
Ejemplo n.º 12
0
 def __init__(
     self,
     *,
     image_backend: str = None,
     resize: bool = True,
     normalize: bool = True,
     min_size: int = 800,
     max_size: int = 1333,
     pixel_mean: Tuple[float, float, float] = (0.485, 0.456, 0.406),
     pixel_std: Tuple[float, float, float] = (0.229, 0.224, 0.225),
     size_divisibility: int = 32,
     **kwargs,
 ) -> None:
     super().__init__(size_divisibility=size_divisibility, **kwargs)
     if image_backend is not None:
         torchvision.set_image_backend(image_backend)
     self.resize = resize
     self.normalize = normalize
     self.min_size = min_size
     self.max_size = max_size
     self.pixel_mean = pixel_mean
     self.pixel_std = pixel_std
Ejemplo n.º 13
0
def dls_from_pytorch(
        train_data_path: Union[str, PosixPath],
        val_data_path: Union[str, PosixPath],
        train_tfms: List,
        val_tfms: List,
        batch_size: int,
        num_workers: int,
        dataset_func: Callable = ImageFolderDataset,
        loader: Callable = default_loader,
        image_backend: str = 'pil',  # 'accimage'
        limit_dataset: Union[bool, int] = False,
        pin_memory: bool = True,
        shuffle: bool = True,
        shuffle_val: bool = False,
        drop_last: bool = True,
        drop_last_val: bool = False,
        persistent_workers: bool = False):
    """Return fastai dataloaders created from pytorch dataloaders.

    Args:
        train_data_path (Union[str, PosixPath]): path for train data.
        val_data_path (Union[str, PosixPath]): path for validation data.
        train_tfms (List): List of transforms for train data.
        val_tfms (List): List of transforms for validation data
        batch_size (int): Batch size
        num_workers (int): Number of workers
        dataset_func (Callable, optional): Funtion or class to create dataset. Defaults to ImageFolderDataset.
        loader (Callable, optional): Function that load image. Defaults to default_loader.
        image_backend (str, optional): Image backand to use. Defaults to 'pil'.
        pin_memory (bool, optional): Use pin memory. Defaults to True.
        shuffle (bool, optional): Use shuffle for train data. Defaults to True.
        shuffle_val (bool, optional): Use shuffle for validation data. Defaults to False.
        drop_last (bool, optional): If last batch not full drop it or not. Defaults to True.
        drop_last_val (bool, optional): If last batch on validation data not full drop it or not. Defaults to False.
        persistent_workers (bool, optional): Use persistante workers. Defaults to False.

    Returns:
        fastai dataloaders
    """
    set_image_backend(image_backend)
    train_tfms = T.Compose(train_tfms)
    val_tfms = T.Compose(val_tfms)
    train_ds = dataset_func(root=train_data_path,
                            transform=train_tfms,
                            loader=loader,
                            limit_dataset=limit_dataset)
    val_ds = dataset_func(root=val_data_path,
                          transform=val_tfms,
                          loader=loader,
                          limit_dataset=limit_dataset)

    train_loader = DataLoader(dataset=train_ds,
                              batch_size=batch_size,
                              num_workers=num_workers,
                              pin_memory=pin_memory,
                              shuffle=shuffle,
                              drop_last=drop_last,
                              persistent_workers=persistent_workers)
    val_loader = DataLoader(dataset=val_ds,
                            batch_size=batch_size,
                            num_workers=num_workers,
                            pin_memory=pin_memory,
                            shuffle=shuffle_val,
                            drop_last=drop_last_val,
                            persistent_workers=persistent_workers)
    return DataLoaders(train_loader, val_loader)
def main(r, res_root):
    datasets = [
        'kinetics', 'mini_kinetics', 'activitynet', 'ucf101', 'hmdb51', 'mit',
        'breakfast', 'mini_breakfast', 'movingmnist',
        'movingmnist_blackframes', 'movingmnist_longterm'
    ]
    models = ['resnet', 'vidbagnet', 'vidbagnet_tem']
    datasets_info_file = "datasets_info.csv"
    #datasets_info = read_dataset_info(datasets_info_file)

    #model_configs = model_parameters(r, datasets, models, datasets_info)

    #print(model_configs.dataset)
    #print(model_configs.model)
    #print(model_configs.model_size)
    #print('num_frames', model_configs.num_frames, 't_stride',\
    #      model_configs.t_stride, 'size', model_configs.size,\
    #          'bs', model_configs.bs)
    #print("num_run", model_configs.num_run)
    checkpoints = [f for f in os.listdir(res_root + r) if "pth" in f]
    checkpoints.sort(key=lambda x: int(x.split("_")[1].split(".")[0]))
    '''{"root_path": "/tudelft.net/staff-bulk/ewi/insy/VisionLab/ombrettastraff", 
    "video_path": "/tudelft.net/staff-bulk/ewi/insy/VisionLab/ombrettastraff/movingMNIST/movingmnistdata", 
    "annotation_path": "/tudelft.net/staff-bulk/ewi/insy/VisionLab/ombrettastraff/movingMNIST/movingmnistdata/mnist_json.json", 
    "result_path": "/tudelft.net/staff-bulk/ewi/insy/VisionLab/ombrettastraff/3D-ResNets-PyTorch/results/movingmnist_resnet_18_32frames_32size_bs16_1", 
    "dataset": "movingmnist", "n_classes": 10, "n_pretrain_classes": 10,
    "pretrain_path": "/tudelft.net/staff-bulk/ewi/insy/VisionLab/ombrettastraff/3D-ResNets-PyTorch/results/movingmnist_resnet_18_32frames_32size_bs16_1/save_100.pth", 
    "ft_begin_module": "3D-ResNets-PyTorch/results/movingmnist_resnet_18_32frames_32size_bs16_1/save_100.pth", 
    "sample_size": 32, "sample_duration": 32, "sample_t_stride": 1, "train_crop": "center", "train_crop_min_scale": 0.25, 
    "train_crop_min_ratio": 0.75, "no_hflip": false, "colorjitter": false, "train_t_crop": "random", "learning_rate": 0.1, 
    "momentum": 0.9, "dampening": 0.0, "weight_decay": 0.001, "mean_dataset": "kinetics", "no_mean_norm": false, "no_std_norm": 
        false, "value_scale": 1, "nesterov": false, "optimizer": "sgd", "lr_scheduler": "multistep", "multistep_milestones": [50, 100, 150], 
        "overwrite_milestones": false, "plateau_patience": 10, "batch_size": 128, "inference_batch_size": 1, 
        "batchnorm_sync": false, "n_epochs": 200, "n_val_samples": 3, 
        "resume_path": "/tudelft.net/staff-bulk/ewi/insy/VisionLab/ombrettastraff/3D-ResNets-PyTorch/results/movingmnist_resnet_18_32frames_32size_bs16_1/save_100.pth", 
        "no_train": true, "no_val": true, "inference": true, "inference_subset": "test", "inference_stride": 2, 
        "inference_crop": "center", "inference_no_average": false, "no_cuda": false, "n_threads": 4, 
        "checkpoint": 10, "model": "resnet", "model_depth": 18, "receptive_size": 9, "conv1_t_size": 7,
        "conv1_t_stride": 1, "no_max_pool": false, "resnet_shortcut": "B", "resnet_widen_factor": 1.0, 
        "wide_resnet_k": 2, "resnext_cardinality": 32, "input_type": "rgb", "manual_seed": 1, 
        "accimage": false, "output_topk": 1, "file_type": "jpg", "tensorboard": true,
        "distributed": false, "dist_url": "tcp://127.0.0.1:23456", "world_size": -1, 
        "n_finetune_classes": 10, "arch": "resnet-18", "begin_epoch": 1, "mean": [0.4345, 0.4051, 0.3775], 
        "std": [0.2768, 0.2713, 0.2737], "n_input_channels": 3}'''

    model_results = {}

    if os.path.exists(os.path.join(res_root, r, "opts.json")):
        with open(os.path.join(res_root, r, "opts.json"), "r") as f:
            model_opts = json.load(f)

        print("Testing", r)

        for c in checkpoints[round(len(checkpoints) * 3 / 4):]:

            epoch = c.split("_")[1].split(".")[0]
            print(c, epoch)
            input_text = "--root_path=" + model_opts["root_path"] + \
                " --video_path=" + model_opts["video_path"] + \
                " --annotation_path=" + model_opts["annotation_path"] + \
                " --dataset=" + model_opts["dataset"] + \
                " --n_classes=" + str(model_opts["n_classes"]) + \
                " --sample_size=" + str(model_opts["sample_size"]) + \
                " --sample_duration=" + str(model_opts["sample_duration"]) + \
                " --sample_t_stride=" + str(model_opts["sample_t_stride"]) + \
                " --train_crop=" + model_opts["train_crop"] + \
                " --train_t_crop=" + model_opts["train_t_crop"] + \
                " --value_scale=" + str(model_opts["value_scale"]) + \
                " --inference_batch_size=1 " + \
                " --inference_subset=" + model_opts["inference_subset"] + \
                " --inference_stride=" + str(model_opts["sample_t_stride"]) + \
                " --inference_crop=" + model_opts["train_crop"] + \
                " --n_threads=4 " + \
                " --model=" + model_opts["model"] + \
                " --model_depth=" + str(model_opts["model_depth"]) + \
                " --receptive_size=" + str(model_opts["receptive_size"]) + \
                " --output_topk=1 --file_type=" + model_opts["file_type"] + \
                " --ft_begin_module=3D-ResNets-PyTorch/" + res_root+r+"/"+c + \
                " --result_path=" + model_opts["result_path"] + \
                " --no_train --no_val --inference" + \
                " --n_pretrain_classes=" + str(model_opts["n_classes"]) + \
                " --pretrain_path=3D-ResNets-PyTorch/" + res_root+r+"/"+c + \
                " --resume_path=3D-ResNets-PyTorch/" + res_root+r+"/"+c

            opt = get_opt(arguments_string=input_text, save=False)

            opt.device = torch.device(
                'cpu' if model_opts["no_cuda"] else 'cuda')
            if not opt.no_cuda:
                cudnn.benchmark = True
            if opt.accimage:
                torchvision.set_image_backend('accimage')

            opt.ngpus_per_node = torch.cuda.device_count()
            inference_results = main_worker(-1, opt)

            model_results['epoch_' + epoch] = inference_results
            print('epoch', epoch, 'test acc', inference_results)

        with open(res_root + r + "/checkpoints_test_results.json", "w") as f:
            json.dump(model_results, f)
Ejemplo n.º 15
0
import torch
from .base import Datasets as dataset
from torchvision import transforms, set_image_backend
import random, os
from PIL import Image
import numpy as np
import accimage
set_image_backend('accimage')
from scipy.ndimage.filters import gaussian_filter
import json
import matplotlib.pyplot as plt
mycmap = plt.cm.get_cmap('jet')


class DSAttDatasets(dataset):
    def __init__(self,
                 args,
                 dataset_root,
                 ground_truth,
                 typ,
                 sample_duration=16,
                 sample_size=224,
                 phase='train'):
        super(DSAttDatasets,
              self).__init__(args, dataset_root, ground_truth, typ,
                             sample_duration, sample_size, phase)

    def image_propose(self, data_path, sl):
        sample_size = self.sample_size
        if self.phase == 'train':
            resize = eval(self.args.resize)
Ejemplo n.º 16
0
from os import listdir as ld
from os.path import join as pj
import time

import h5py
from PIL import Image
from tqdm import tqdm
import numpy as np
import nonechucks as nc

import torch
import torch.utils.data as data
from torchvision import set_image_backend
import torchvision.transforms as transforms

set_image_backend("accimage")


def has_file_allowed_extension(filename, extensions):
    """Checks if a file is an allowed extension.

    Args:
        filename (string): path to a file
        extensions (iterable of strings): extensions to consider (lowercase)

    Returns:
        bool: True if the filename ends with one of given extensions
    """
    filename_lower = filename.lower()
    return any(filename_lower.endswith(ext) for ext in extensions)
def main(r,
         res_root,
         checkpoint_epoch=0,
         test_data_path='',
         annotation_path=''):

    checkpoint = "save_" + str(checkpoint_epoch) + ".pth"

    if os.path.exists(os.path.join(res_root, r, "opts.json")):
        with open(os.path.join(res_root, r, "opts.json"), "r") as f:
            model_opts = json.load(f)
        print(model_opts)
        print("Testing", r, checkpoint)

        model_opts["n_classes"] = 3

        if test_data_path == '': test_data_path = model_opts["video_path"]
        if annotation_path == '':
            annotation_path = model_opts["annotation_path"]
        print("Test set:", test_data_path, annotation_path)

        input_text = "--root_path=" + model_opts["root_path"] + \
            " --video_path=" + test_data_path + \
            " --annotation_path=" + annotation_path + \
            " --dataset=" + model_opts["dataset"] + \
            " --n_classes=" + str(model_opts["n_classes"]) + \
            " --sample_size=" + str(model_opts["sample_size"]) + \
            " --sample_duration=" + str(model_opts["sample_duration"]) + \
            " --sample_t_stride=" + str(model_opts["sample_t_stride"]) + \
            " --train_crop=" + model_opts["train_crop"] + \
            " --train_t_crop=" + model_opts["train_t_crop"] + \
            " --value_scale=" + str(model_opts["value_scale"]) + \
            " --inference_batch_size=1 " + \
            " --inference_subset=" + model_opts["inference_subset"] + \
            " --inference_stride=" + str(model_opts["sample_t_stride"]) + \
            " --inference_crop=" + model_opts["train_crop"] + \
            " --n_threads=4 " + \
            " --model=" + model_opts["model"] + \
            " --model_depth=" + str(model_opts["model_depth"]) + \
            " --receptive_size=" + str(model_opts["receptive_size"]) + \
            " --output_topk=1 --file_type=" + model_opts["file_type"] + \
            " --ft_begin_module=3D-ResNets-PyTorch/" + res_root+r+"/"+checkpoint + \
            " --result_path=" + model_opts["result_path"] + \
            " --no_train --no_val --inference" + \
            " --n_pretrain_classes=" + str(model_opts["n_classes"]) + \
            " --pretrain_path=3D-ResNets-PyTorch/" + res_root+r+"/"+checkpoint + \
            " --resume_path=3D-ResNets-PyTorch/" + res_root+r+"/"+checkpoint

        opt = get_opt(arguments_string=input_text, save=False)

        opt.device = torch.device('cpu' if model_opts["no_cuda"] else 'cuda')
        if not opt.no_cuda:
            cudnn.benchmark = True
        if opt.accimage:
            torchvision.set_image_backend('accimage')

        opt.ngpus_per_node = torch.cuda.device_count()
        inference_results = main_worker(-1, opt)

        print(r, checkpoint, 'test acc', inference_results)
        print("Test set:", test_data_path, annotation_path)

    else:
        print("Opts does not exist.")
Ejemplo n.º 18
0
parser = argparse.ArgumentParser(description='PyTorch ImageNet Training')
parser.add_argument('--data', metavar='PATH', required=True,
                    help='path to dataset')
parser.add_argument('--nThreads', '-j', default=2, type=int, metavar='N',
                    help='number of data loading threads (default: 2)')
parser.add_argument('--batchSize', '-b', default=256, type=int, metavar='N',
                    help='mini-batch size (1 = pure stochastic) Default: 256')
parser.add_argument('--accimage', action='store_true',
                    help='use accimage')


if __name__ == "__main__":
    args = parser.parse_args()

    if args.accimage:
        torchvision.set_image_backend('accimage')
    print('Using {}'.format(torchvision.get_image_backend()))

    # Data loading code
    transform = transforms.Compose([
        transforms.RandomSizedCrop(224),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406],
                             std=[0.229, 0.224, 0.225]),
    ])

    traindir = os.path.join(args.data, 'train')
    valdir = os.path.join(args.data, 'val')
    train = datasets.ImageFolder(traindir, transform)
    val = datasets.ImageFolder(valdir, transform)
Ejemplo n.º 19
0
def main(base_dir='/mnt/hdd/fast20/jpeg/flickr2500',
         ext='jpg',
         num_workers=8,
         sort_fie=False,
         smart=False,
         batch_size=64,
         verbose=False,
         use_accimage=True,
         expname=None,
         loader_workers=None):

    assert ext == 'jpg' or not use_accimage, "accimage only works for jpg"

    if loader_workers is None:
        loader_workers = num_workers

    if verbose:
        logzero.loglevel(logging.DEBUG)

    # prepare CPU affinity
    assert num_workers == 1 or num_workers % 2 == 0, "Must give an even number for num_workers or 1: {}".format(
        num_workers)
    if num_workers > 1:
        cpuset = range(CPU_START[0], CPU_START[0] + num_workers / 2) + range(
            CPU_START[1], CPU_START[1] + num_workers / 2)
    else:
        cpuset = [
            CPU_START[0],
        ]
    logger.info("cpuset: {}".format(cpuset))
    psutil.Process().cpu_affinity(cpuset)

    # prepare paths
    paths = list(recursive_glob(base_dir, '*.{}'.format(ext)))
    if sort_fie:
        logger.info("Sorting paths")
        paths = sorted(paths, key=get_fie_physical_start)
    else:
        # deterministic pseudo-random
        random.seed(42)
        random.shuffle(paths)
    logger.info("Total {} paths".format(len(paths)))

    if use_accimage:
        torchvision.set_image_backend('accimage')

    trn_name = 'trn10'  # taipei-scrubbing.py in Blazeit
    # trn_name = 'trn18'  # end2end.py in Blazeit
    trn_name_to_layers = \
        [('trn10', [1, 1, 1, 1]),
         ('trn18', [2, 2, 2, 2]),
         ('trn34', [3, 4, 6, 3])]
    trn_name_to_layers = dict(trn_name_to_layers)

    model = PytorchResNet(trn_name_to_layers[trn_name],
                          num_classes=2,
                          conv1_size=3,
                          conv1_pad=1,
                          nbf=16,
                          downsample_start=False)
    model.cuda()

    normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                     std=[0.229, 0.224, 0.225])

    # prepare preprocessing pipeline
    if smart:
        # do resizing using OpenCV in ImageDataSet
        # because ndarray -> PIL conversion is an overhead
        preprocess = transforms.Compose([transforms.ToTensor(), normalize])
    else:
        preprocess = transforms.Compose(
            [transforms.Resize(RESOL),
             transforms.ToTensor(), normalize])

    manager = mp.Manager()
    context = Context(manager, qsize=len(paths) + 1)

    # hack for smart batch and basline-sorted: enque all paths in the beginning to force sequential access
    map(context.q.put, paths)

    image_dataset = ImageDataset(paths,
                                 context,
                                 transform=preprocess,
                                 smart=smart,
                                 sort_fie=sort_fie)
    loader = torch.utils.data.DataLoader(image_dataset,
                                         batch_size=batch_size,
                                         shuffle=False,
                                         num_workers=loader_workers,
                                         pin_memory=False)

    logger.info("warm up with a fake batch")
    fake_batch = torch.zeros([batch_size, 3] + list(RESOL),
                             dtype=torch.float32)
    fake_batch = fake_batch.cuda()
    print fake_batch.shape, fake_batch.dtype
    _ = model(fake_batch)

    # zf: use a separate queue to pre-fetch batches, phew ....
    # batch_q = Queue.Queue(100)
    # def batch_prefetcher(loader, q):
    #     for i, image_tensor in enumerate(loader):
    #         q.put(image_tensor)
    #         logger.info("Prefetched batch {}".format(i))
    #     logger.info("Loader finish.")
    #     q.put(None)

    # prefetcher_thread = threading.Thread(target=batch_prefetcher, args=(loader, batch_q))
    # prefetcher_thread.daemon = True
    # prefetcher_thread.start()

    loaderit = iter(loader)
    logger.info("Type of iter(loader): {}".format(type(loaderit).__name__))

    tic = time.time()
    tic_cpu = time.clock()
    num_batches = 0
    last_batch_time = tic
    elapsed_gpu = 0.

    for _ in range(int(len(paths) / batch_size)):

        idx, data = loaderit._get_data()
        loaderit.tasks_outstanding -= 1
        loaderit._try_put_index()
        logger.info("Get internal batch {}".format(idx))

        image_tensor = data

        image_tensor = image_tensor.cuda()

        tic_gpu = time.time()
        output = model(image_tensor)
        now = time.time()

        logger.info("Run batch {} in {:.3f} ms".format(
            num_batches, 1000 * (now - last_batch_time)))
        logger.info("Batch GPU time: {:.3f} ms".format(1000 * (now - tic_gpu)))

        last_batch_time = now
        elapsed_gpu += (now - tic_gpu)
        num_batches += 1
        # logger.info("loaderiter.task_outstanding: {}".format(datait.tasks_outstanding))

    elapsed = time.time() - tic
    elapsed_cpu = time.clock() - tic_cpu
    elapsed_cpu += context.stats['cpu_time']

    logger.info("# batches: {}".format(num_batches))
    logger.info(
        "GPU time per batch {:.3f} ms, GPU time per image {:.3f} ms".format(
            1000 * elapsed_gpu / num_batches,
            1000 * elapsed_gpu / num_batches / batch_size))

    num_items = len(paths)
    bytes_from_disk = context.stats['bytes_from_disk']

    logger.info(
        "Elapsed {:.3f} ms / image, CPU elapsed {:.3f} ms / image".format(
            1000 * elapsed / num_items, 1000 * elapsed_cpu / num_items))
    logger.info(str(context.stats))

    keys_dict = {
        'expname': expname,
        'basedir': base_dir,
        'ext': ext,
        'num_workers': num_workers,
        'hostname': this_hostname
    }
    vals_dict = {
        'num_items': num_items,
        'avg_wall_ms': 1e3 * elapsed / num_items,
        'avg_cpu_ms': 1e3 * elapsed_cpu / num_items,
        'avg_mbyteps': bytes_from_disk * 1e-6 / elapsed,
    }

    logger.info(str(keys_dict))
    logger.info(str(vals_dict))

    if expname:
        sess = dbutils.get_session()
        dbutils.insert_or_update_one(sess,
                                     dbmodles.EurekaExp,
                                     keys_dict=keys_dict,
                                     vals_dict=vals_dict)
        sess.commit()
        sess.close()
Ejemplo n.º 20
0
from PIL import Image
CV2PIL = {
    cv2.INTER_NEAREST: Image.NEAREST,  # NONE
    cv2.INTER_LANCZOS4: Image.LANCZOS,  # ANTIALIAS
    cv2.INTER_LINEAR: Image.BILINEAR,
    cv2.INTER_CUBIC: Image.BICUBIC,  # CUBIC
}

from . import sys, logging

try:
    import torchvision as tv
except ImportError as e:
    pass
else:
    tv.set_image_backend('accimage')

# PIL functions

PIL_EXIF_TAGS = {}


def PIL_exif_tag(value='Orientation'):
    if value not in PIL_EXIF_TAGS:
        from PIL import ExifTags
        for key in ExifTags.TAGS.keys():
            if ExifTags.TAGS[key] == 'Orientation':
                PIL_EXIF_TAGS[value] = key
                break
    return PIL_EXIF_TAGS.get(value, None)
Ejemplo n.º 21
0
def main():
    global args

    model = nn.Conv2d(3, 1, 1)

    model = model.cuda()

    if args.loader == 'pil' or args.loader == 'accimage':

        if args.loader == 'accimage':
            import torchvision
            torchvision.set_image_backend(args.loader)

        from torchvision import transforms, datasets
        from torch.utils.data import DataLoader

        dataset = datasets.ImageFolder(
            args.data,
            transform=transforms.Compose([
                transforms.Resize(256),
                transforms.CenterCrop(224),
                transforms.ToTensor(),
                transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                     std=[0.229, 0.224, 0.225])
            ])
        )

        loader = DataLoader(
            dataset,
            batch_size=args.batch_size,
            num_workers=args.num_workers,
            pin_memory=True
        )

    elif args.loader == 'dali-cpu':
        pass
    elif args.loader == 'dali-gpu':
        pass
    elif args.loader == 'opencv':
        from opencv_transforms import opencv_transforms as transforms
        from torch.utils.data import DataLoader
        from torchvision import datasets
        import cv2
        import numpy as np

        def loader_fn(path: str) -> np.ndarray:
            return cv2.imread(path)

        dataset = datasets.ImageFolder(
            args.data,
            transform=transforms.Compose([
                transforms.Resize((256, 256), interpolation=cv2.INTER_LINEAR),
                transforms.CenterCrop((224, 224)),
                transforms.ToTensor(),
                transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                     std=[0.229, 0.224, 0.225])
            ]),
            loader=loader_fn
        )

        loader = DataLoader(
            dataset,
            batch_size=args.batch_size,
            num_workers=args.num_workers,
            pin_memory=True
        )

    n = len(dataset) / args.batch_size

    start = time.perf_counter()

    for i in range(2):

        batch_start = time.perf_counter()

        for i, (image, _) in enumerate(loader):
            # _ = model(image)
            _ = image.cuda(non_blocking=True)

            print(f'{i}/{n}')

    torch.cuda.synchronize()

    end = time.perf_counter()

    print(f'Loader {args.loader}: {end - start}')
Ejemplo n.º 22
0
def main():
    parser = argparse.ArgumentParser(
        description=
        'Model training engine for molecular phenotype models from TCGA images'
    )

    parser.add_argument(
        '--Task',
        help='WGD-ALL, WGD, MSI, MSI-SINGLE_LABEL (only implemented tasks)',
        required=True,
        type=str)
    parser.add_argument('--GPU',
                        help='GPU device to use for model training',
                        required=True,
                        type=int)
    parser.add_argument('--n_workers',
                        help='Number of workers to use for dataloaders',
                        required=False,
                        default=12,
                        type=int)
    parser.add_argument('--lr',
                        help='Inital learning rate',
                        required=False,
                        default=1e-4,
                        type=float)
    parser.add_argument('--patience',
                        help='Patience for lr scheduler',
                        required=False,
                        default=10,
                        type=int)
    parser.add_argument('--model_name',
                        help='Path to place saved model state',
                        required=True,
                        type=str)
    parser.add_argument('--batch_size',
                        help='Batch size for training and validation loops',
                        required=False,
                        default=264,
                        type=int)
    parser.add_argument('--epochs',
                        help='Epochs to run training and validation loops',
                        required=False,
                        default=50,
                        type=int)
    parser.add_argument('--magnification',
                        help='Magnification level of tiles',
                        required=False,
                        default='5.0',
                        type=str)
    parser.add_argument('--fine_tune_classifier_only',
                        help='Freeze convolutional layers',
                        action='store_true')
    args = parser.parse_args()

    # https://github.com/pytorch/accimage
    set_image_backend('accimage')
    device = torch.device('cuda', args.GPU)

    # set root dir for images
    if args.Task.upper() == 'WGD-ALL':
        root_dir = data_utils.root_dir_all
    else:
        root_dir = data_utils.root_dir_coad

    # normalize and tensorify jpegs
    transform_train = train_utils.transform_train
    transform_val = train_utils.transform_validation

    # set the task
    # TODO: implement a general for table to perform predictions
    if args.Task.upper() == 'WGD-ALL':
        pickle_file = '/n/tcga_wgd_sa_all_1.0.pkl'
        batch_all, sa_trains, sa_vals = data_utils.load_COAD_train_val_sa_pickle(
            pickle_file=pickle_file,
            return_all_cancers=True,
            split_in_two=False)
        train_cancers = [
            'COAD', 'BRCA', 'READ_10x', 'LUSC_10x', 'BLCA_10x', 'LUAD_10x',
            'STAD_10x', 'HNSC_10x'
        ]
        train_idxs = [batch_all.index(cancer) for cancer in train_cancers]

        val_cancers = [
            'COAD', 'BRCA', 'READ_10x', 'LUSC_10x', 'BLCA_10x', 'LUAD_10x',
            'STAD_10x', 'HNSC_10x'
        ]
        val_idxs = [batch_all.index(cancer) for cancer in val_cancers]

        sa_train = {}
        sa_val = {}
        ct_train = []
        ct_val = []

        for idx, (sa_t, sa_v) in enumerate(zip(sa_trains, sa_vals)):
            if idx in train_idxs:
                sa_train.update(sa_t)
                ct_train.extend([batch_all[idx]] *
                                len(list(sa_trains[idx].keys())))
            if idx in val_idxs:
                sa_val.update(sa_v)
                ct_val.extend([batch_all[idx]] *
                              len(list(sa_vals[idx].keys())))

        train_set = data_utils.TCGADataset_tiles(
            sa_train,
            root_dir,
            transform=transform_train,
            magnification=args.magnification,
            all_cancers=True,
            cancer_type=ct_train)
        val_set = data_utils.TCGADataset_tiles(
            sa_val,
            root_dir,
            transform=transform_val,
            magnification=args.magnification,
            all_cancers=True,
            cancer_type=ct_val)

        jpg_to_sample = val_set.jpg_to_sample
        output_shape = 1
    else:
        if args.Task.upper() == 'MSI':
            sa_train, sa_val = data_utils.process_MSI_data()
            output_shape = 2
        elif args.Task.upper() == 'WGD':
            sa_train, sa_val = data_utils.process_WGD_data()
            output_shape = 1
        elif args.Task.upper() == 'MSI-SINGLE_LABEL':
            sa_train, sa_val = data_utils.process_MSI_data()
            # replace ordinal labels with binary
            for key, value in zip(sa_train.keys(), sa_train.values()):
                sa_train[key] = int(value >= 1)
            for key, value in zip(sa_val.keys(), sa_val.values()):
                sa_val[key] = int(value >= 1)
            output_shape = 1

        # save sample_annotations_train, sample_annotations_val as pickle
        pickle_file = args.model_name[:-3] + '_sa.pkl'
        with open(pickle_file, 'wb') as f:
            pickle.dump([sa_train, sa_val], f)

        train_set = data_utils.TCGADataset_tiles(
            sa_train,
            root_dir,
            transform=transform_train,
            magnification=args.magnification)
        val_set = data_utils.TCGADataset_tiles(
            sa_val,
            root_dir,
            transform=transform_val,
            magnification=args.magnification)
        jpg_to_sample = val_set.jpg_to_sample

    # set weights for random sampling of tiles such that batches are class balanced
    counts = [c[1] for c in sorted(Counter(train_set.all_labels).items())]
    weights = 1.0 / np.array(counts, dtype=float) * 1e3
    reciprocal_weights = []
    for index in range(len(train_set)):
        reciprocal_weights.append(weights[train_set.all_labels[index]])

    batch_size = args.batch_size
    # current WeightedRandomSampler is too slow when replacement = False.
    # TODO: implement switch to weighted loss or weighted sampler
    #sampler = torch.utils.data.sampler.WeightedRandomSampler(reciprocal_weights, len(reciprocal_weights), replacement=True)
    train_loader = DataLoader(train_set,
                              batch_size=batch_size,
                              pin_memory=True,
                              shuffle=True,
                              num_workers=args.n_workers)
    valid_loader = DataLoader(val_set,
                              batch_size=batch_size,
                              pin_memory=True,
                              num_workers=args.n_workers)

    learning_rate = args.lr
    # TODO: allow resnet model specification or introduce other model choices
    resnet = models.resnet18(pretrained=True)
    # TODO: implement flexible solution to these hardcoded values
    if args.fine_tune_classifier_only:
        for param in resnet.parameters():
            param.requires_grad = False
    resnet.fc = nn.Linear(2048, output_shape, bias=True)  #8192
    resnet.cuda(device=device)

    if args.fine_tune_classifier_only:
        optimizer = torch.optim.Adam(resnet.fc.parameters(), lr=learning_rate)
    else:
        optimizer = torch.optim.Adam(resnet.parameters(), lr=learning_rate)

    criterion_train = nn.BCEWithLogitsLoss(reduction='mean')
    criterion_val = nn.BCEWithLogitsLoss(reduction='none')
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
        optimizer, patience=args.patience, min_lr=1e-6)

    best_loss = 1e8
    best_acc = 0.0
    for e in range(args.epochs):
        if e % 10 == 0:
            print('---------- LR: {0:0.8f} ----------'.format(
                optimizer.state_dict()['param_groups'][0]['lr']))
        train_utils.embedding_training_loop(e,
                                            train_loader,
                                            resnet,
                                            criterion_train,
                                            optimizer,
                                            device=device,
                                            task=args.Task.upper())
        val_loss, val_acc = train_utils.embedding_validation_loop(
            e,
            valid_loader,
            resnet,
            criterion_val,
            jpg_to_sample,
            dataset='Val',
            scheduler=scheduler,
            device=device,
            task=args.Task.upper())
        if val_loss < best_loss:
            torch.save(resnet.state_dict(), args.model_name)
            best_loss = val_loss
            best_acc = val_acc
            print('WROTE MODEL')
        elif val_acc > best_acc:
            torch.save(resnet.state_dict(), args.model_name)
            best_acc = val_acc
            best_loss = val_loss
            print('WROTE MODEL')
Ejemplo n.º 23
0
def main():
    parser = argparse.ArgumentParser(
        description=
        'MAML training engine for molecular phenotype models from TCGA images')

    parser.add_argument('--GPU',
                        help='GPU device to use for model training',
                        required=True,
                        type=int)
    parser.add_argument('--n_workers',
                        help='Number of workers to use for dataloaders',
                        required=False,
                        default=12,
                        type=int)
    parser.add_argument('--alpha',
                        help='Inital learning rate for local training',
                        required=False,
                        default=1e-4,
                        type=float)
    parser.add_argument('--eta',
                        help='Inital learning rate for global training',
                        required=False,
                        default=1e-4,
                        type=float)
    parser.add_argument('--patience',
                        help='Patience for lr scheduler',
                        required=False,
                        default=10,
                        type=int)
    parser.add_argument('--model_name',
                        help='Path to place saved model state',
                        required=True,
                        type=str)
    parser.add_argument('--batch_size_train',
                        help='Batch size for training loop',
                        required=False,
                        default=264,
                        type=int)
    parser.add_argument('--batch_size_val',
                        help='Batch size for validation loop',
                        required=False,
                        default=264,
                        type=int)
    parser.add_argument('--epochs',
                        help='Epochs to run training and validation loops',
                        required=False,
                        default=50,
                        type=int)
    parser.add_argument('--magnification',
                        help='Magnification level of tiles',
                        required=False,
                        default='5.0',
                        type=str)
    parser.add_argument('--save_best',
                        help='Metric used to save best model',
                        required=False,
                        default='loss',
                        type=str)
    args = parser.parse_args()

    # setup
    set_image_backend('accimage')
    device = torch.device('cuda', args.GPU)

    # load sample annotations pickle
    pickle_file = '/home/sxchao/MSI_prediction/tcga_project/tcga_wgd_sa_all.pkl'
    batch_all, _, _, sa_trains, sa_vals = data_utils.load_COAD_train_val_sa_pickle(
        pickle_file=pickle_file, return_all_cancers=True, split_in_two=True)
    # normalize and tensorify jpegs
    train_transform = train_utils.transform_train
    val_transform = train_utils.transform_validation

    # initialize Datasets
    train_sets = []
    val_sets = []

    train_cancers = [
        'COAD', 'BRCA', 'READ_10x', 'LUSC_10x', 'BLCA_10x', 'LUAD_10x',
        'STAD_10x', 'HNSC_10x'
    ]
    val_cancers = ['UCEC', 'LIHC_10x', 'KIRC_10x']

    magnification = args.magnification
    root_dir = '/n/mounted-data-drive/'
    for i in range(len(train_cancers)):
        train_set = data_utils.TCGADataset_tiles(
            sa_trains[batch_all.index(train_cancers[i])],
            root_dir + train_cancers[i] + '/',
            transform=train_transform,
            magnification=magnification,
            batch_type='tile')
        train_sets.append(train_set)

    for j in range(len(val_cancers)):
        val_set = data_utils.TCGADataset_tiles(sa_vals[batch_all.index(
            val_cancers[j])],
                                               root_dir + val_cancers[j] + '/',
                                               transform=val_transform,
                                               magnification=magnification,
                                               batch_type='tile',
                                               return_jpg_to_sample=True)
        val_sets.append(val_set)

    # get DataLoaders
    train_loader = torch.utils.data.DataLoader(
        data_utils.MergedDataset(*train_sets),
        batch_size=args.batch_size_train,
        shuffle=True,
        num_workers=args.n_workers,
        pin_memory=True)

    #val_loader = torch.utils.data.DataLoader(data_utils.ConcatDataset(*val_sets, return_jpg_to_sample=True),
    #batch_size=args.batch_size_val,
    #shuffle=True,
    #num_workers=args.n_workers,
    #pin_memory=True)

    val_loaders = [
        torch.utils.data.DataLoader(val_set,
                                    batch_size=args.batch_size_val,
                                    shuffle=True,
                                    num_workers=args.n_workers,
                                    pin_memory=True) for val_set in val_sets
    ]

    # model args
    state_dict_file = '/n/tcga_models/resnet18_WGD_all_10x.pt'
    state_dict_file_maml = '/n/tcga_models/maml_WGD_10x_v03a.pt'
    input_size = 2048
    hidden_size = 512
    output_size = 1

    # initialize trained resnet
    resnet = models.resnet18(pretrained=False)
    resnet.fc = nn.Linear(2048, output_size, bias=True)
    saved_state = torch.load(state_dict_file,
                             map_location=lambda storage, loc: storage)
    resnet.load_state_dict(saved_state)

    # freeze layers
    resnet.fc = model_utils.Identity()
    resnet.cuda(device=device)
    for param in resnet.parameters():
        param.requires_grad = False

    # initialize theta_global
    model_global = model_utils.FeedForward(input_size, hidden_size,
                                           output_size)
    saved_state = torch.load(state_dict_file_maml,
                             map_location=lambda storage, loc: storage)
    model_global.load_state_dict(saved_state)
    model_global.cuda(device=device)
    theta_global = []
    for p in model_global.parameters():
        theta_global.append(p.detach().clone().cuda(device=device))

    #model_global.update_params(theta_global)
    #model_global.linear1.weight = torch.nn.Parameter(theta_global[0])
    #model_global.linear1.bias = torch.nn.Parameter(theta_global[1])
    #model_global.linear2.weight = torch.nn.Parameter(theta_global[2])
    #model_global.linear2.bias = torch.nn.Parameter(theta_global[3])

    # initialize local models, set theta_local = theta_global
    local_models = []
    for i in range(len(train_cancers)):
        local_models.append(
            model_utils.FeedForward(input_size, hidden_size, output_size,
                                    theta_global).cuda(device=device))

    # training params
    num_epochs = args.epochs
    alpha = args.alpha
    eta = args.eta
    patience = args.patience
    factor = 0.1
    patience_count = 0
    previous_loss = 1e8
    best_loss = 1e8
    best_acc = 0.0

    # train meta-learner
    for e in range(num_epochs):
        # reduce LR on plateau
        if patience_count > patience:
            alpha = factor * alpha
            eta = factor * eta
            patience_count = 0
            print('--- LR DECAY --- Alpha: {0:0.8f}, Eta: {1:0.8f}'.format(
                alpha, eta))

        for step, (tiles, labels) in enumerate(train_loader):
            tiles, labels = tiles.cuda(device=device), labels.cuda(
                device=device).float()
            grads, local_models = train_utils.maml_train_local(step,
                                                               tiles,
                                                               labels,
                                                               resnet,
                                                               local_models,
                                                               alpha=alpha,
                                                               device=device)
            theta_global, model_global = train_utils.maml_train_global(
                theta_global, model_global, grads, eta=eta)
            for i in range(len(local_models)):
                local_models[i].update_params(theta_global)

        total_loss, acc, mean_pool_acc = train_utils.maml_validate_all(
            e, resnet, model_global, val_loaders, device=device)

        if total_loss > previous_loss:
            patience_count += 1
        else:
            patience_count = 0
        previous_loss = total_loss

        if args.save_best == 'loss':
            if total_loss < best_loss:
                torch.save(model_global.state_dict(), args.model_name)
                print('--- WROTE MODEL ---')
                best_loss = total_loss
        elif args.save_best == 'acc':
            if mean_pool_acc > best_acc:
                torch.save(model_global.state_dict(), args.model_name)
                print('--- WROTE MODEL ---')
                best_acc = mean_pool_acc
Ejemplo n.º 24
0
def main_worker(gpu, ngpus_per_node, args):
    global best_acc1
    args.gpu = gpu

    if args.gpu is not None:
        print("Use GPU: {} for training".format(args.gpu))

    if args.distributed:
        if args.dist_url == "env://" and args.rank == -1:
            args.rank = int(os.environ["RANK"])
        if args.multiprocessing_distributed:
            # For multiprocessing distributed training, rank needs to be the
            # global rank among all the processes
            args.rank = args.rank * ngpus_per_node + gpu
        dist.init_process_group(backend=args.dist_backend,
                                init_method=args.dist_url,
                                world_size=args.world_size,
                                rank=args.rank)
    # create model
    print("=> creating model '{}'".format(args.arch))
    model = models.__dict__[args.arch](pretrained=True)
    for m in model.modules():
        if isinstance(m, QuantConv2d):
            m.weight_quant = weight_quantize_fn(w_bit=args.bit)
            m.act_grid = build_power_value(args.bit)
            m.act_alq = act_quantization(args.bit, m.act_grid)

    if args.distributed:
        # For multiprocessing distributed, DistributedDataParallel constructor
        # should always set the single device scope, otherwise,
        # DistributedDataParallel will use all available devices.
        if args.gpu is not None:
            torch.cuda.set_device(args.gpu)
            model.cuda(args.gpu)
            # When using a single GPU per process and per
            # DistributedDataParallel, we need to divide the batch size
            # ourselves based on the total number of GPUs we have
            args.batch_size = int(args.batch_size / ngpus_per_node)
            args.workers = int(
                (args.workers + ngpus_per_node - 1) / ngpus_per_node)
            model = torch.nn.parallel.DistributedDataParallel(
                model, device_ids=[args.gpu])
        else:
            model.cuda()
            # DistributedDataParallel will divide and allocate batch_size to all
            # available GPUs if device_ids are not set
            model = torch.nn.parallel.DistributedDataParallel(model)
    elif args.gpu is not None:
        torch.cuda.set_device(args.gpu)
        model = model.cuda(args.gpu)
    else:
        # DataParallel will divide and allocate batch_size to all available GPUs
        if args.arch.startswith('alexnet') or args.arch.startswith('vgg'):
            model.features = torch.nn.DataParallel(model.features)
            model.cuda()
        else:
            model = torch.nn.DataParallel(model).cuda()

    # init from pre-trained model or full-precision model
    if args.pretrained:
        if os.path.isfile(args.pretrained):
            print("=> loading pre-trained model from {}".format(
                args.pretrained))
            checkpoint = torch.load(args.pretrained)
            model.load_state_dict(checkpoint['state_dict'])
            model.module.show_params()
        else:
            print('no pre-trained model found')
            exit()

    # define loss function (criterion) and optimizer
    criterion = nn.CrossEntropyLoss().cuda(args.gpu)
    model_params = []
    for name, params in model.module.named_parameters():
        if 'act_alpha' in name:
            model_params += [{
                'params': [params],
                'lr': 3e-2,
                'weight_decay': 2e-5
            }]
        elif 'wgt_alpha' in name:
            model_params += [{
                'params': [params],
                'lr': 1e-2,
                'weight_decay': 1e-4
            }]
        else:
            model_params += [{'params': [params]}]
    optimizer = torch.optim.SGD(model_params,
                                lr=args.lr,
                                momentum=0.9,
                                weight_decay=args.weight_decay)
    print('Total params: %.2fM' % (sum(p.numel()
                                       for p in model.parameters()) / 1e+6))

    # optionally resume from a checkpoint
    if args.resume:
        if os.path.isfile(args.resume):
            print("=> loading checkpoint '{}'".format(args.resume))
            checkpoint = torch.load(args.resume)
            args.start_epoch = checkpoint['epoch']
            best_acc1 = checkpoint['best_acc1']
            if args.gpu is not None:
                # best_acc1 may be from a checkpoint from a different GPU
                best_acc1 = best_acc1.to(args.gpu)
            model.load_state_dict(checkpoint['state_dict'])
            optimizer.load_state_dict(checkpoint['optimizer'])
            print("=> loaded checkpoint '{}' (epoch {})".format(
                args.resume, checkpoint['epoch']))
        else:
            print("=> no checkpoint found at '{}'".format(args.resume))

    cudnn.benchmark = True

    # data loader by official torchversion:
    # --------------------------------------------------------------------------
    print('==> Using Pytorch Dataset')
    input_size = 224  # image resolution for resnets
    import torchvision
    import torchvision.transforms as transforms
    import torchvision.datasets as datasets
    traindir = os.path.join(args.data, 'train')
    valdir = os.path.join(args.data, 'val')
    normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                     std=[0.229, 0.224, 0.225])

    torchvision.set_image_backend('accimage')
    train_dataset = datasets.ImageFolder(
        traindir,
        transforms.Compose([
            transforms.Resize((256, 256)),
            transforms.RandomCrop(input_size),
            transforms.RandomHorizontalFlip(),
            transforms.ToTensor(),
            normalize,
        ]))

    train_loader = torch.utils.data.DataLoader(train_dataset,
                                               batch_size=args.batch_size,
                                               shuffle=True,
                                               num_workers=args.workers,
                                               pin_memory=True)
    val_loader = torch.utils.data.DataLoader(datasets.ImageFolder(
        valdir,
        transforms.Compose([
            transforms.Resize((256, 256)),
            transforms.CenterCrop(input_size),
            transforms.ToTensor(),
            normalize,
        ])),
                                             batch_size=args.batch_size,
                                             shuffle=False,
                                             num_workers=args.workers,
                                             pin_memory=True)
    # --------------------------------------------------------------------------
    if args.evaluate:
        validate(val_loader, model, criterion, args)
        return
    writer = SummaryWriter(comment='res18_4bit')

    for epoch in range(args.start_epoch, args.epochs):
        if args.distributed:
            train_sampler.set_epoch(epoch)
        adjust_learning_rate(optimizer, epoch, args)

        # train for one epoch
        train(train_loader, model, criterion, optimizer, epoch, args, writer)

        # evaluate on validation set
        acc1 = validate(val_loader, model, criterion, args)
        writer.add_scalar('test_acc', acc1, epoch)
        # remember best acc@1 and save checkpoint
        is_best = acc1 > best_acc1
        best_acc1 = max(acc1, best_acc1)
        print('best_acc:' + str(best_acc1))

        if not args.multiprocessing_distributed or (
                args.multiprocessing_distributed
                and args.rank % ngpus_per_node == 0):
            save_checkpoint(
                {
                    'epoch': epoch + 1,
                    'arch': args.arch,
                    'state_dict': model.state_dict(),
                    'best_acc1': best_acc1,
                    'optimizer': optimizer.state_dict(),
                }, is_best)
Ejemplo n.º 25
0
    def __init__(self, config_name):
        """
        Args:
            config_name: name of a configuration module to import
        """

        print('Config name: {}'.format(config_name))

        self.cfg = import_config_by_name(config_name)
        print(self.cfg)

        print('Start preparing dataset')
        self.prepare_dataset()
        print('Finished preparing dataset')

        print("torch.__version__=", torch.__version__)
        torchvision.set_image_backend('accimage')
        print("torchvision.get_image_backend()=",
              torchvision.get_image_backend())

        self.epochs_to_train = 500
        self.base_learning_rate = 0.02
        self.lr_scales = (
            (0, 0.1),  # perform soft warm-up to reduce chance of divergence
            (2, 0.2),
            (4, 0.3),
            (6, 0.5),
            (8, 0.7),
            (10, 1.0),  # main learning rate multiplier
            (int(0.90 * self.epochs_to_train), 0.1),
            (int(0.95 * self.epochs_to_train), 0.01),
        )

        self.train_batch_size = 32
        self.val_batch_size = 32

        num_workers_train = 12
        num_workers_val = 12

        input_traits = default_input_traits()

        labelmap = NameListDataset.getLabelmap()

        model = detection_models.SingleShotDetector(self.cfg.backbone_specs,
                                                    self.cfg.multibox_specs,
                                                    input_traits['resolution'],
                                                    labelmap)

        if True:
            model_dp = torch.nn.DataParallel(model)
            cudnn.benchmark = True
        else:
            model_dp = model

        if torch.cuda.is_available():
            model_dp.cuda()

        self.model = model
        self.model_dp = model_dp

        build_target = BuildTargetFunctor(model)
        map_to_network_input = image_anno_transforms.MapImageAndAnnoToInputWindow(
            input_traits['resolution'])

        def load_list(name):
            path = os.path.join(self.cfg.train_val_split_dir, name + '.pkl')
            with open(path, 'rb') as input:
                return pickle.load(input)

        self.train_dataset = NameListDataset(
            dataset_list=load_list('train_list'),
            image_transform=train_image_transform(),
            image_and_anno_transform=train_image_and_annotation_transform(),
            map_to_network_input=map_to_network_input,
            build_target=build_target)

        self.balanced_val_dataset = NameListDataset(
            dataset_list=load_list('val_list'),
            image_transform=None,
            image_and_anno_transform=None,
            map_to_network_input=map_to_network_input,
            build_target=build_target)

        # Data loading and augmentation pipeline for training
        self.train_loader = torch.utils.data.DataLoader(
            self.train_dataset,
            batch_size=self.train_batch_size,
            shuffle=True,
            num_workers=num_workers_train,
            collate_fn=extended_collate,
            pin_memory=True)

        # Data loading and augmentation pipeline for validation
        self.val_loader = torch.utils.data.DataLoader(
            self.balanced_val_dataset,
            batch_size=self.val_batch_size,
            shuffle=False,
            num_workers=num_workers_val,
            collate_fn=extended_collate,
            pin_memory=True)

        self.optimizer = None
        self.learning_rate = None

        self.train_iter = 0
        self.epoch = 0
        self.best_performance_metric = None

        self.print_freq = 10

        self.writer = None

        self.run_dir = os.path.join('runs', self.cfg.run_name)
        os.makedirs(self.run_dir, exist_ok=True)
        self.snapshot_path = os.path.join(self.run_dir,
                                          self.cfg.run_name + '.pth.tar')

        pass
Ejemplo n.º 26
0
def main():
    global args, best_prec1
    args = parser.parse_args()

    # create model
    if args.arch == 'alexnet':
        model = model_list.alexnet(pretrained=args.pretrained)
        input_size = 227
    else:
        raise Exception('Model not supported yet')

    if args.arch.startswith('alexnet') or args.arch.startswith('vgg'):
        model.features = torch.nn.DataParallel(model.features)
        model.cuda()
    else:
        model = torch.nn.DataParallel(model).cuda()

    # define loss function (criterion) and optimizer
    criterion = nn.CrossEntropyLoss().cuda()

    optimizer = torch.optim.Adam(model.parameters(),
                                 args.lr,
                                 weight_decay=args.weight_decay)

    for m in model.modules():
        if isinstance(m, nn.Conv2d) or isinstance(m, nn.Linear):
            c = float(m.weight.data[0].nelement())
            m.weight.data = m.weight.data.normal_(0, 1.0 / c)
        elif isinstance(m, nn.BatchNorm2d):
            m.weight.data = m.weight.data.zero_().add(1.0)

    # optionally resume from a checkpoint
    if args.resume:
        if os.path.isfile(args.resume):
            print("=> loading checkpoint '{}'".format(args.resume))
            checkpoint = torch.load(args.resume)
            args.start_epoch = checkpoint['epoch']
            best_prec1 = checkpoint['best_prec1']
            model.load_state_dict(checkpoint['state_dict'])
            optimizer.load_state_dict(checkpoint['optimizer'])
            print("=> loaded checkpoint '{}' (epoch {})".format(
                args.resume, checkpoint['epoch']))
            del checkpoint
        else:
            print("=> no checkpoint found at '{}'".format(args.resume))

    cudnn.benchmark = True

    # Data loading code

    if args.caffe_data:
        print('==> Using Caffe Dataset')
        cwd = os.getcwd()
        sys.path.append(cwd + '/../')
        import datasets as datasets
        import datasets.transforms as transforms
        if not os.path.exists(args.data + '/imagenet_mean.binaryproto'):
            print("==> Data directory" + args.data + "does not exits")
            print("==> Please specify the correct data path by")
            print("==>     --data <DATA_PATH>")
            return

        normalize = transforms.Normalize(meanfile=args.data +
                                         '/imagenet_mean.binaryproto')

        train_dataset = datasets.ImageFolder(
            args.data,
            transforms.Compose([
                transforms.RandomHorizontalFlip(),
                transforms.ToTensor(),
                normalize,
                transforms.RandomSizedCrop(input_size),
            ]),
            Train=True)

        train_sampler = None

        train_loader = torch.utils.data.DataLoader(train_dataset,
                                                   batch_size=args.batch_size,
                                                   shuffle=False,
                                                   num_workers=args.workers,
                                                   pin_memory=True,
                                                   sampler=train_sampler)

        val_loader = torch.utils.data.DataLoader(datasets.ImageFolder(
            args.data,
            transforms.Compose([
                transforms.ToTensor(),
                normalize,
                transforms.CenterCrop(input_size),
            ]),
            Train=False),
                                                 batch_size=args.batch_size,
                                                 shuffle=False,
                                                 num_workers=args.workers,
                                                 pin_memory=True)
    else:
        print('==> Using Pytorch Dataset')
        import torchvision
        import torchvision.transforms as transforms
        import torchvision.datasets as datasets
        traindir = os.path.join(args.data, 'train')
        valdir = os.path.join(args.data, 'val')
        normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                         std=[0.229, 0.224, 0.225])

        torchvision.set_image_backend('accimage')

        train_dataset = datasets.ImageFolder(
            traindir,
            transforms.Compose([
                transforms.RandomResizedCrop(input_size, scale=(0.40, 1.0)),
                transforms.RandomHorizontalFlip(),
                transforms.ToTensor(),
                normalize,
            ]))

        train_loader = torch.utils.data.DataLoader(train_dataset,
                                                   batch_size=args.batch_size,
                                                   shuffle=True,
                                                   num_workers=args.workers,
                                                   pin_memory=True)
        val_loader = torch.utils.data.DataLoader(datasets.ImageFolder(
            valdir,
            transforms.Compose([
                transforms.Resize(256),
                transforms.CenterCrop(input_size),
                transforms.ToTensor(),
                normalize,
            ])),
                                                 batch_size=args.batch_size,
                                                 shuffle=False,
                                                 num_workers=args.workers,
                                                 pin_memory=True)

    print model

    # define the binarization operator
    global bin_op
    bin_op = util.BinOp(model)

    if args.evaluate:
        validate(val_loader, model, criterion)
        return

    for epoch in range(args.start_epoch, args.epochs):
        adjust_learning_rate(optimizer, epoch)

        # train for one epoch
        train(train_loader, model, criterion, optimizer, epoch)

        # evaluate on validation set
        prec1 = validate(val_loader, model, criterion)

        # remember best prec@1 and save checkpoint
        is_best = prec1 > best_prec1
        best_prec1 = max(prec1, best_prec1)
        save_checkpoint(
            {
                'epoch': epoch + 1,
                'arch': args.arch,
                'state_dict': model.state_dict(),
                'best_prec1': best_prec1,
                'optimizer': optimizer.state_dict(),
            }, is_best)
Ejemplo n.º 27
0
def main(params_json_path: str, ckpt_path: str, output_dir: str,
         splits: Sequence[str], target_mapping_json_path: Optional[str] = None,
         label_index_json_path: Optional[str] = None,
         **kwargs: Any) -> None:
    """Main function."""
    # input validation
    assert os.path.exists(params_json_path)
    assert os.path.exists(ckpt_path)
    assert (target_mapping_json_path is None) == (label_index_json_path is None)
    if target_mapping_json_path is not None:
        assert label_index_json_path is not None
        assert os.path.exists(target_mapping_json_path)
        assert os.path.exists(label_index_json_path)

    # evaluating with accimage is much faster than Pillow or Pillow-SIMD
    torchvision.set_image_backend('accimage')

    # create output directory
    if not os.path.exists(output_dir):
        print('Creating output directory:', output_dir)
        os.makedirs(output_dir, exist_ok=True)

    with open(params_json_path, 'r') as f:
        params = json.load(f)
    pprint(params)

    # override saved params with kwargs
    for key, new in kwargs.items():
        if new is None:
            continue
        if key in params:
            saved = params[key]
            print(f'Overriding saved {key}. Saved: {saved}. '
                  f'Override with: {new}.')
        else:
            print(f'Did not find {key} in saved params. Using value {new}.')
        params[key] = new

    model_name: str = params['model_name']
    dataset_dir: str = params['dataset_dir']

    if 'efficientnet' in model_name:
        img_size = efficientnet.EfficientNet.get_image_size(model_name)
    else:
        img_size = 224

    # For now, we don't weight crops by detection confidence during
    # evaluation. But consider changing this.
    print('Creating dataloaders')
    loaders, label_names = train_classifier.create_dataloaders(
        dataset_csv_path=os.path.join(dataset_dir, 'classification_ds.csv'),
        label_index_json_path=os.path.join(dataset_dir, 'label_index.json'),
        splits_json_path=os.path.join(dataset_dir, 'splits.json'),
        cropped_images_dir=params['cropped_images_dir'],
        img_size=img_size,
        multilabel=params['multilabel'],
        label_weighted=params['label_weighted'],
        weight_by_detection_conf=False,
        batch_size=params['batch_size'],
        num_workers=params['num_workers'],
        augment_train=False)
    num_labels = len(label_names)

    # create model, compile with TorchScript if given checkpoint is not compiled
    print('Loading model from checkpoint')
    try:
        model = torch.jit.load(ckpt_path, map_location='cpu')
    except RuntimeError:
        compiled_path = trace_model(model_name, ckpt_path, num_labels, img_size)
        model = torch.jit.load(compiled_path, map_location='cpu')
    model, device = train_classifier.prep_device(model)

    if len(splits) == 0:
        print('No splits given! Exiting.')
        return

    target_cols_map = None
    if target_mapping_json_path is not None:
        assert label_index_json_path is not None

        # verify that target names matches original "label names" from dataset
        with open(target_mapping_json_path, 'r') as f:
            target_names_map = json.load(f)
        target_names = set(target_names_map.keys())

        # if the dataset does not already have a 'other' category, then the
        # 'other' category must come last in label_names to avoid conflicting
        # with an existing label_id
        if target_names != set(label_names):
            assert target_names == set(label_names) | {'other'}
            label_names.append('other')

            with open(os.path.join(output_dir, 'label_index.json'), 'w') as f:
                json.dump(dict(enumerate(label_names)), f)

        with open(label_index_json_path, 'r') as f:
            idx_to_label = json.load(f)
        classifier_name_to_idx = {
            idx_to_label[str(k)]: k for k in range(len(idx_to_label))
        }

        target_cols_map = {}
        for i_target, label_name in enumerate(label_names):
            classifier_names = target_names_map[label_name]
            target_cols_map[i_target] = [
                classifier_name_to_idx[classifier_name]
                for classifier_name in classifier_names
            ]

    # define loss function (criterion)
    loss_fn: torch.nn.Module
    if params['multilabel']:
        loss_fn = torch.nn.BCEWithLogitsLoss(reduction='none').to(device)
    else:
        loss_fn = torch.nn.CrossEntropyLoss(reduction='none').to(device)

    split_metrics = {}
    split_label_stats = {}
    cms = {}
    for split in splits:
        print(f'Evaluating {split}...')
        df, metrics, cm = test_epoch(
            model, loaders[split], weighted=True, device=device,
            label_names=label_names, loss_fn=loss_fn,
            target_mapping=target_cols_map)

        # this file ends up being huge, so we GZIP compress it
        output_csv_path = os.path.join(output_dir, f'outputs_{split}.csv.gz')
        df.to_csv(output_csv_path, index=False, compression='gzip')

        split_metrics[split] = metrics
        cms[split] = cm
        split_label_stats[split] = calc_per_label_stats(cm, label_names)

        # double check that the accuracy metrics are computed properly
        preds = df[label_names].to_numpy().argmax(axis=1)
        preds = np.asarray(label_names)[preds]
        assert np.isclose(metrics['acc_top1'] / 100.,
                          sum(preds == df['label']) / len(df))
        assert np.isclose(metrics['acc_weighted_top1'] / 100.,
                          sum((preds == df['label']) * df['weight']) / len(df))

    metrics_df = pd.concat(split_metrics, names=['split']).unstack(level=1)
    metrics_df.to_csv(os.path.join(output_dir, 'overall_metrics.csv'))

    # save the confusion matrices to .npz
    npz_path = os.path.join(output_dir, 'confusion_matrices.npz')
    np.savez_compressed(npz_path, **cms)

    # save per-label statistics
    label_stats_df = pd.concat(
        split_label_stats, names=['split', 'label']).reset_index()
    label_stats_csv_path = os.path.join(output_dir, 'label_stats.csv')
    label_stats_df.to_csv(label_stats_csv_path, index=False)
def main():
    parser = argparse.ArgumentParser(
        description=
        'Model training engine for molecular phenotype models from TCGA images'
    )

    parser.add_argument('--Task',
                        help='WGD (only implemented task)',
                        default='WGD',
                        required=False,
                        type=str)
    parser.add_argument('--GPU',
                        help='GPU device to use for model training',
                        required=True,
                        type=int)
    parser.add_argument('--n_workers',
                        help='Number of workers to use for dataloaders',
                        required=False,
                        default=12,
                        type=int)
    parser.add_argument('--lr',
                        help='Inital learning rate',
                        required=False,
                        default=1e-4,
                        type=float)
    parser.add_argument('--patience',
                        help='Patience for lr scheduler',
                        required=False,
                        default=10,
                        type=int)
    parser.add_argument('--model_name',
                        help='Path to place saved model state',
                        required=True,
                        type=str)
    parser.add_argument('--batch_size',
                        help='Batch size for training and validation loops',
                        required=False,
                        default=264,
                        type=int)
    parser.add_argument('--epochs',
                        help='Epochs to run training and validation loops',
                        required=False,
                        default=200,
                        type=int)
    parser.add_argument('--magnification',
                        help='Magnification level of tiles',
                        required=False,
                        default='10.0',
                        type=str)
    args = parser.parse_args()

    # https://github.com/pytorch/accimage
    set_image_backend('accimage')
    device = torch.device('cuda', args.GPU)

    root_dir = data_utils.root_dir_all

    # normalize and tensorify jpegs
    transform_train = train_utils.transform_train
    transform_val = train_utils.transform_validation

    # set up model
    input_size = 2048
    hidden_size = 512
    output_size = 1
    state_dict_file = '/n/tcga_models/archive/resnet18_WGD_10x.pt'
    resnet = models.resnet18(pretrained=False)
    resnet.fc = nn.Linear(2048, output_size, bias=True)
    saved_state = torch.load(state_dict_file,
                             map_location=lambda storage, loc: storage)
    resnet.load_state_dict(saved_state)

    for p in resnet.parameters():
        p.requires_grad = False

    attend_and_pool = model_utils.Attention(input_size, hidden_size,
                                            output_size)
    resnet.fc = attend_and_pool
    for p in resnet.fc.parameters():
        p.requires_grad = True
    resnet.cuda(device=device)

    optim = torch.optim.Adam(resnet.fc.parameters(), lr=args.lr)
    train_cancers = ['COAD']
    val_cancers = ['COAD']
    pickle_file = '/n/tcga_wgd_sa_all_1.0.pkl'
    batch_all, sa_trains, sa_vals = data_utils.load_COAD_train_val_sa_pickle(
        pickle_file=pickle_file, return_all_cancers=True, split_in_two=False)
    train_idxs = [batch_all.index(cancer) for cancer in train_cancers]
    val_idxs = [batch_all.index(cancer) for cancer in val_cancers]
    train_sets = []
    val_sets = []
    for i in range(len(train_cancers)):
        train_set = data_utils.TCGA_random_tiles_sampler(
            sa_trains[batch_all.index(train_cancers[i])],
            root_dir + train_cancers[i] + '/',
            transform=transform_train,
            magnification=args.magnification,
            tile_batch_size=args.batch_size)
        train_sets.append(train_set)

    for j in range(len(val_cancers)):
        val_set = data_utils.TCGA_random_tiles_sampler(
            sa_vals[batch_all.index(val_cancers[j])],
            root_dir + val_cancers[j] + '/',
            transform=transform_val,
            magnification=args.magnification,
            tile_batch_size=args.batch_size)
        val_sets.append(val_set)

    train_loader = torch.utils.data.DataLoader(train_set,
                                               batch_size=1,
                                               shuffle=True,
                                               num_workers=args.n_workers,
                                               pin_memory=False)

    val_loader = torch.utils.data.DataLoader(val_set,
                                             batch_size=1,
                                             shuffle=True,
                                             num_workers=args.n_workers,
                                             pin_memory=False)

    best_loss = 1e8
    best_acc = 0.0
    criterion = nn.BCEWithLogitsLoss()
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optim,
                                                           patience=8,
                                                           min_lr=1e-6)
    for e in range(args.epochs):
        if e % 5 == 0:
            print('---------- LR: {0:0.8f} ----------'.format(
                optim.state_dict()['param_groups'][0]['lr']))
        train_utils.training_loop_random_sampling(e,
                                                  train_loader,
                                                  device,
                                                  criterion,
                                                  resnet,
                                                  optim,
                                                  gradient_step_length=5,
                                                  reporting_step_length=10)
        val_loss, val_acc = train_utils.validation_loop_for_random_sampler(
            e, val_loader, device, criterion, resnet, scheduler)
        if val_loss < best_loss:
            torch.save(resnet.state_dict(), args.model_name)
            best_loss = val_loss
            best_acc = val_acc
            print('WROTE MODEL')
        elif val_acc > best_acc:
            torch.save(resnet.state_dict(), args.model_name)
            best_acc = val_acc
            best_loss = val_loss
            print('WROTE MODEL')
Ejemplo n.º 29
0
        if not opt.no_train and opt.lr_scheduler == 'multistep':
            scheduler.step()
        elif not opt.no_train and opt.lr_scheduler == 'plateau':
            scheduler.step(prev_val_loss)

    if opt.inference:
        inference_loader, inference_class_names = get_inference_utils(opt)
        inference_result_path = opt.result_path / '{}.json'.format(
            opt.inference_subset)

        inference.inference(inference_loader, model, inference_result_path,
                            inference_class_names, opt.inference_no_average,
                            opt.output_topk)


if __name__ == '__main__':
    opt = get_opt()

    opt.device = torch.device('cpu' if opt.no_cuda else 'cuda')
    if not opt.no_cuda:
        cudnn.benchmark = True
    if opt.accimage:
        torchvision.set_image_backend('accimage')

    opt.ngpus_per_node = torch.cuda.device_count()
    if opt.distributed:
        opt.world_size = opt.ngpus_per_node * opt.world_size
        mp.spawn(main_worker, nprocs=opt.ngpus_per_node, args=(opt, ))
    else:
        main_worker(-1, opt)
Ejemplo n.º 30
0
import os
import logging

import numpy as np
import torch
import torch.nn as nn
from torch.utils.data import Dataset
import torchvision
import torchvision.transforms as transforms
from pycocotools.coco import COCO

from vocab import BasicTokenizer

import accimage
torchvision.set_image_backend("accimage")
from PIL import Image


class CocoDataset(Dataset):
    """
    Args:
        root_path:      path to COCO dataset
        mode:           train or val
        tokenizer:      train or val
        transform:      augmentation on images, likely a transforms.Compose object
    """
    def __init__(self, root_path: str, mode: str, tokenizer, transform=None):
        assert mode in ("train", "val")
        assert isinstance(tokenizer, BasicTokenizer)
        self.img_path = os.path.join(root_path, "{}2017".format(mode))
        self.coco = COCO(
Ejemplo n.º 31
0
def get_default_image_loader():
    torchvision.set_image_backend('accimage')

    return accimage_loader
Ejemplo n.º 32
0
def main():
    global args, best_prec1
    args = parser.parse_args()

    # create model
    if args.arch=='alexnet':
        model = model_list.alexnet(pretrained=args.pretrained)
        input_size = 227
    else:
        raise Exception('Model not supported yet')

    if args.arch.startswith('alexnet') or args.arch.startswith('vgg'):
        model.features = torch.nn.DataParallel(model.features)
        model.cuda()
    else:
        model = torch.nn.DataParallel(model).cuda()

    # define loss function (criterion) and optimizer
    criterion = nn.CrossEntropyLoss().cuda()

    optimizer = torch.optim.Adam(model.parameters(), args.lr,
                                weight_decay=args.weight_decay)

    for m in model.modules():
        if isinstance(m, nn.Conv2d) or isinstance(m, nn.Linear):
            c = float(m.weight.data[0].nelement())
            m.weight.data = m.weight.data.normal_(0, 2.0/c)
        elif isinstance(m, nn.BatchNorm2d):
            m.weight.data = m.weight.data.zero_().add(1.0)
            m.bias.data = m.bias.data.zero_()

    # optionally resume from a checkpoint
    if args.resume:
        if os.path.isfile(args.resume):
            print("=> loading checkpoint '{}'".format(args.resume))
            checkpoint = torch.load(args.resume)
            args.start_epoch = checkpoint['epoch']
            best_prec1 = checkpoint['best_prec1']
            model.load_state_dict(checkpoint['state_dict'])
            optimizer.load_state_dict(checkpoint['optimizer'])
            print("=> loaded checkpoint '{}' (epoch {})"
                  .format(args.resume, checkpoint['epoch']))
            del checkpoint
        else:
            print("=> no checkpoint found at '{}'".format(args.resume))

    cudnn.benchmark = True

    # Data loading code

    if args.caffe_data:
        print('==> Using Caffe Dataset')
        cwd = os.getcwd()
        sys.path.append(cwd+'/../')
        import datasets as datasets
        import datasets.transforms as transforms
        if not os.path.exists(args.data+'/imagenet_mean.binaryproto'):
            print("==> Data directory"+args.data+"does not exits")
            print("==> Please specify the correct data path by")
            print("==>     --data <DATA_PATH>")
            return

        normalize = transforms.Normalize(
                meanfile=args.data+'/imagenet_mean.binaryproto')


        train_dataset = datasets.ImageFolder(
            args.data,
            transforms.Compose([
                transforms.RandomHorizontalFlip(),
                transforms.ToTensor(),
                normalize,
                transforms.RandomSizedCrop(input_size),
            ]),
            Train=True)

        train_sampler = None

        train_loader = torch.utils.data.DataLoader(
            train_dataset, batch_size=args.batch_size, shuffle=False,
            num_workers=args.workers, pin_memory=True, sampler=train_sampler)

        val_loader = torch.utils.data.DataLoader(
            datasets.ImageFolder(args.data, transforms.Compose([
                transforms.ToTensor(),
                normalize,
                transforms.CenterCrop(input_size),
            ]),
            Train=False),
            batch_size=args.batch_size, shuffle=False,
            num_workers=args.workers, pin_memory=True)
    else:
        print('==> Using Pytorch Dataset')
        import torchvision
        import torchvision.transforms as transforms
        import torchvision.datasets as datasets
        traindir = os.path.join(args.data, 'train')
        valdir = os.path.join(args.data, 'val')
        normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                std=[1./255., 1./255., 1./255.])

        torchvision.set_image_backend('accimage')

        train_dataset = datasets.ImageFolder(
                traindir,
                transforms.Compose([
                    transforms.Resize((256, 256)),
                    transforms.RandomCrop(input_size),
                    transforms.RandomHorizontalFlip(),
                    transforms.ToTensor(),
                    normalize,
                    ]))

        train_loader = torch.utils.data.DataLoader(
                train_dataset, batch_size=args.batch_size, shuffle=True,
                num_workers=args.workers, pin_memory=True)
        val_loader = torch.utils.data.DataLoader(
                datasets.ImageFolder(valdir, transforms.Compose([
                    transforms.Resize((256, 256)),
                    transforms.CenterCrop(input_size),
                    transforms.ToTensor(),
                    normalize,
                    ])),
                batch_size=args.batch_size, shuffle=False,
                num_workers=args.workers, pin_memory=True)

    print model

    # define the binarization operator
    global bin_op
    bin_op = util.BinOp(model)

    if args.evaluate:
        validate(val_loader, model, criterion)
        return

    for epoch in range(args.start_epoch, args.epochs):
        adjust_learning_rate(optimizer, epoch)

        # train for one epoch
        train(train_loader, model, criterion, optimizer, epoch)

        # evaluate on validation set
        prec1 = validate(val_loader, model, criterion)

        # remember best prec@1 and save checkpoint
        is_best = prec1 > best_prec1
        best_prec1 = max(prec1, best_prec1)
        save_checkpoint({
            'epoch': epoch + 1,
            'arch': args.arch,
            'state_dict': model.state_dict(),
            'best_prec1': best_prec1,
            'optimizer' : optimizer.state_dict(),
        }, is_best)