Esempio n. 1
0
 def _create_dataset(self):
     name = self.name
     seed = self.seed
     if name == 'mnist':
         train, test = chainer.datasets.get_mnist(ndim=3, rgb_format=True)
     elif name == 'usps1800':
         train, test = get_usps1800(seed=seed)
     elif name == 'mnist2000':
         train, test = get_mnist2000(seed=seed)
     elif name == 'svhn':
         train, test = chainer.datasets.get_svhn()
     elif name == 'mnistm':
         train, test = get_mnistm()
     elif name == 'amazon':
         train = DirectoryParsingLabelDataset('./data/Office/amazon/images')
         test = copy.deepcopy(train)
     elif name == 'dslr':
         train = DirectoryParsingLabelDataset('./data/Office/dslr/images')
         test = copy.deepcopy(train)
     elif name == 'webcam':
         train = DirectoryParsingLabelDataset('./data/Office/webcam/images')
         test = copy.deepcopy(train)
     elif name == 'visda_train':
         train = DirectoryParsingLabelDataset('./data/VisDA/train')
         test = copy.deepcopy(train)
     elif name == 'visda_validation':
         train = DirectoryParsingLabelDataset('./data/VisDA/validation')
         test = copy.deepcopy(train)
     else:
         sys.exit("The domain name {} is wrong.".format(self.name))
     return train, test
    def test_directory_parsing_label_dataset(self):
        dataset = DirectoryParsingLabelDataset(
            self.tmp_dir, color=self.color)

        if self.depth == 1:
            expected_legnth = self.n_img_per_class * self.n_class
        elif self.depth == 2:
            expected_legnth =\
                self.n_img_per_class * self.n_sub_directory * self.n_class
        self.assertEqual(len(dataset), expected_legnth)

        assert_is_label_dataset(dataset, self.n_class, color=self.color)

        label_names = directory_parsing_label_names(self.tmp_dir)
        self.assertEqual(
            label_names, ['class_{}'.format(i) for i in range(self.n_class)])

        if self.depth == 1:
            self.assertEqual(
                dataset.img_paths,
                ['{}/class_{}/img{}.{}'.format(self.tmp_dir, i, j, self.suffix)
                 for i in range(self.n_class)
                 for j in range(self.n_img_per_class)])
        elif self.depth == 2:
            self.assertEqual(
                dataset.img_paths,
                ['{}/class_{}/nested_{}/img{}.{}'.format(
                    self.tmp_dir, i, j, k, self.suffix)
                 for i in range(self.n_class)
                 for j in range(self.n_sub_directory)
                 for k in range(self.n_img_per_class)])
 def __init__(self,
              root=os.path.join("~", ".chainer", "datasets", "imagenet"),
              mode="train",
              transform=None):
     split = "train" if mode == "train" else "val"
     root = os.path.join(root, split)
     self.transform = transform
     self.base = DirectoryParsingLabelDataset(root)
    def test_numerical_sort(self):
        dataset = DirectoryParsingLabelDataset(
            self.tmp_dir, numerical_sort=True)

        assert_is_label_dataset(dataset, self.n_class)

        label_names = directory_parsing_label_names(
            self.tmp_dir, numerical_sort=True)
        self.assertEqual(
            label_names, ['{}'.format(i) for i in range(self.n_class)])
Esempio n. 5
0
def tuple2array(url):

    t = DirectoryParsingLabelDataset(url)

    new = [np.arange(3 * 32 * 32, dtype='float32').reshape(3, 32, 32)] * len(t)

    for i in range(len(t)):
        new[i] = t[i][0]

    return new
Esempio n. 6
0
 def __init__(self,
              root,
              scale_size=256,
              crop_size=224,
              mean=(0.485, 0.456, 0.406),
              std=(0.229, 0.224, 0.225)):
     self.base = DirectoryParsingLabelDataset(root)
     self.scale_size = scale_size
     if isinstance(crop_size, int):
         crop_size = (crop_size, crop_size)
     self.crop_size = crop_size
     self.mean = np.array(mean, np.float32)[:, np.newaxis, np.newaxis]
     self.std = np.array(std, np.float32)[:, np.newaxis, np.newaxis]
Esempio n. 7
0
def main():
    parser = argparse.ArgumentParser(
        description='Learning convnet from ILSVRC2012 dataset')
    parser.add_argument('val', help='Path to root of the validation dataset')
    parser.add_argument(
        '--model', choices=('vgg16', 'resnet50', 'resnet101', 'resnet152'))
    parser.add_argument('--pretrained_model', default='imagenet')
    parser.add_argument('--gpu', type=int, default=-1)
    parser.add_argument('--batchsize', type=int, default=32)
    parser.add_argument('--crop', choices=('center', '10'), default='center')
    parser.add_argument('--resnet_mode', default='he')
    args = parser.parse_args()

    dataset = DirectoryParsingLabelDataset(args.val)
    label_names = directory_parsing_label_names(args.val)
    n_class = len(label_names)
    iterator = iterators.MultiprocessIterator(
        dataset, args.batchsize, repeat=False, shuffle=False,
        n_processes=6, shared_mem=300000000)

    if args.model == 'vgg16':
        extractor = VGG16(n_class, args.pretrained_model)
    elif args.model == 'resnet50':
        extractor = ResNet50(
            n_class, args.pretrained_model, mode=args.resnet_mode)
    elif args.model == 'resnet101':
        extractor = ResNet101(
            n_class, args.pretrained_model, mode=args.resnet_mode)
    elif args.model == 'resnet152':
        extractor = ResNet152(
            n_class, args.pretrained_model, mode=args.resnet_mode)
    model = FeaturePredictor(
        extractor, crop_size=224, scale_size=256, crop=args.crop)

    if args.gpu >= 0:
        chainer.cuda.get_device(args.gpu).use()
        model.to_gpu()

    print('Model has been prepared. Evaluation starts.')
    in_values, out_values, rest_values = apply_to_iterator(
        model.predict, iterator, hook=ProgressHook(len(dataset)))
    del in_values

    pred_probs, = out_values
    gt_labels, = rest_values

    accuracy = F.accuracy(
        np.array(list(pred_probs)), np.array(list(gt_labels))).data
    print()
    print('Top 1 Error {}'.format(1. - accuracy))
Esempio n. 8
0
def get_val_data_iterator(data_dir, batch_size, num_workers, num_classes):

    val_dir_path = os.path.join(data_dir, 'val')
    val_dataset = DirectoryParsingLabelDataset(val_dir_path)
    val_dataset_len = len(val_dataset)
    assert (len(directory_parsing_label_names(val_dir_path)) == num_classes)

    val_iterator = iterators.MultiprocessIterator(dataset=val_dataset,
                                                  batch_size=batch_size,
                                                  repeat=False,
                                                  shuffle=False,
                                                  n_processes=num_workers,
                                                  shared_mem=300000000)

    return val_iterator, val_dataset_len
 def __init__(self,
              root=os.path.join("~", ".chainer", "datasets", "imagenet"),
              mode="train",
              scale_size=256,
              crop_size=224,
              mean=(0.485, 0.456, 0.406),
              std=(0.229, 0.224, 0.225)):
     split = "train" if mode == "train" else "val"
     root = os.path.join(root, split)
     self.base = DirectoryParsingLabelDataset(root)
     self.scale_size = scale_size
     if isinstance(crop_size, int):
         crop_size = (crop_size, crop_size)
     self.crop_size = crop_size
     self.mean = np.array(mean, np.float32)[:, np.newaxis, np.newaxis]
     self.std = np.array(std, np.float32)[:, np.newaxis, np.newaxis]
def main():

    args = parser()
    save_dir = Path(args.save_dir)
    save_dir.mkdir(exist_ok=True, parents=True)
    root = args.dataset
    dataset = DirectoryParsingLabelDataset(root)
    mean_path = root + '/mean.npy'
    if os.path.exists(mean_path):
        mean = np.load(mean_path)
    else:
        mean = compute_mean(datasets, root)
        np.save(mean_path, mean)
    use_mean = args.use_mean
    print('use mean flag is ', use_mean)
    if not use_mean:
        print('not using mean')

    X = np.array([image_paths for image_paths in dataset.img_paths])
    y = np.array([label for label in dataset.labels])

    test_data = LabeledImageDataset([(x, y) for x, y in zip(X, y)])
    test = chainer.datasets.TransformDataset(
        test_data,
        partial(_transform2, mean=mean, train=False, mean_flag=args.use_mean))
    #test = chainer.datasets.TransformDataset(test_data, _validation_transform)
    #test_model = L.Classifier(VGG16()).to_gpu()
    class_num = len(set(dataset.labels))
    model = L.Classifier(archs[args.arch](output=class_num)).to_gpu()

    serializers.load_npz(args.load_npz, model)

    dnames = glob.glob('{}/*'.format(root))
    labels_list = []
    for d in dnames:
        p_dir = Path(d)
        labels_list.append(p_dir.name)
    if 'mean.npy' in labels_list:
        labels_list.remove('mean.npy')
    confusion_matrix_cocoa(test, args.gpu, class_num, model, save_dir, 1,
                           labels_list)
Esempio n. 11
0
def setup(dataset, model, pretrained_model, batchsize, val, crop, resnet_arch):
    dataset_name = dataset
    if dataset_name == 'imagenet':
        dataset = DirectoryParsingLabelDataset(val)
        label_names = directory_parsing_label_names(val)

    def eval_(out_values, rest_values):
        pred_probs, = out_values
        gt_labels, = rest_values

        accuracy = F.accuracy(np.array(list(pred_probs)),
                              np.array(list(gt_labels))).data
        print()
        print('Top 1 Error {}'.format(1. - accuracy))

    cls, pretrained_models, default_batchsize = models[model][:3]
    if pretrained_model is None:
        pretrained_model = pretrained_models.get(dataset_name, dataset_name)
    if crop is None:
        crop = models[model][3]
    kwargs = {
        'n_class': len(label_names),
        'pretrained_model': pretrained_model,
    }
    if model in ['resnet50', 'resnet101', 'resnet152']:
        if resnet_arch is None:
            resnet_arch = models[model][4]
        kwargs.update({'arch': resnet_arch})
    extractor = cls(**kwargs)
    model = FeaturePredictor(extractor,
                             crop_size=224,
                             scale_size=256,
                             crop=crop)

    if batchsize is None:
        batchsize = default_batchsize

    return dataset, eval_, model, batchsize
Esempio n. 12
0
def main():
    model_cfgs = {
        'resnet50': {
            'class': ResNet50,
            'score_layer_name': 'fc6',
            'kwargs': {
                'arch': 'fb'
            }
        },
        'resnet101': {
            'class': ResNet101,
            'score_layer_name': 'fc6',
            'kwargs': {
                'arch': 'fb'
            }
        },
        'resnet152': {
            'class': ResNet152,
            'score_layer_name': 'fc6',
            'kwargs': {
                'arch': 'fb'
            }
        }
    }
    parser = argparse.ArgumentParser(
        description='Learning convnet from ILSVRC2012 dataset')
    parser.add_argument('train', help='Path to root of the train dataset')
    parser.add_argument('val', help='Path to root of the validation dataset')
    parser.add_argument('--model',
                        '-m',
                        choices=model_cfgs.keys(),
                        default='resnet50',
                        help='Convnet models')
    parser.add_argument('--communicator',
                        type=str,
                        default='pure_nccl',
                        help='Type of communicator')
    parser.add_argument('--loaderjob', type=int, default=4)
    parser.add_argument('--batchsize',
                        type=int,
                        default=32,
                        help='Batch size for each worker')
    parser.add_argument('--lr', type=float)
    parser.add_argument('--momentum', type=float, default=0.9)
    parser.add_argument('--weight_decay', type=float, default=0.0001)
    parser.add_argument('--out', type=str, default='result')
    parser.add_argument('--epoch', type=int, default=90)
    args = parser.parse_args()

    # https://docs.chainer.org/en/stable/chainermn/tutorial/tips_faqs.html#using-multiprocessiterator
    if hasattr(multiprocessing, 'set_start_method'):
        multiprocessing.set_start_method('forkserver')
        p = multiprocessing.Process()
        p.start()
        p.join()

    comm = chainermn.create_communicator(args.communicator)
    device = comm.intra_rank

    if args.lr is not None:
        lr = args.lr
    else:
        lr = 0.1 * (args.batchsize * comm.size) / 256
        if comm.rank == 0:
            print('lr={}: lr is selected based on the linear '
                  'scaling rule'.format(lr))

    label_names = directory_parsing_label_names(args.train)

    model_cfg = model_cfgs[args.model]
    extractor = model_cfg['class'](n_class=len(label_names),
                                   **model_cfg['kwargs'])
    extractor.pick = model_cfg['score_layer_name']
    model = Classifier(extractor)
    # Following https://arxiv.org/pdf/1706.02677.pdf,
    # the gamma of the last BN of each resblock is initialized by zeros.
    for l in model.links():
        if isinstance(l, Bottleneck):
            l.conv3.bn.gamma.data[:] = 0

    train_data = DirectoryParsingLabelDataset(args.train)
    val_data = DirectoryParsingLabelDataset(args.val)
    train_data = TransformDataset(train_data, ('img', 'label'),
                                  TrainTransform(extractor.mean))
    val_data = TransformDataset(val_data, ('img', 'label'),
                                ValTransform(extractor.mean))
    print('finished loading dataset')

    if comm.rank == 0:
        train_indices = np.arange(len(train_data))
        val_indices = np.arange(len(val_data))
    else:
        train_indices = None
        val_indices = None

    train_indices = chainermn.scatter_dataset(train_indices,
                                              comm,
                                              shuffle=True)
    val_indices = chainermn.scatter_dataset(val_indices, comm, shuffle=True)
    train_data = train_data.slice[train_indices]
    val_data = val_data.slice[val_indices]
    train_iter = chainer.iterators.MultiprocessIterator(
        train_data, args.batchsize, n_processes=args.loaderjob)
    val_iter = iterators.MultiprocessIterator(val_data,
                                              args.batchsize,
                                              repeat=False,
                                              shuffle=False,
                                              n_processes=args.loaderjob)

    optimizer = chainermn.create_multi_node_optimizer(
        CorrectedMomentumSGD(lr=lr, momentum=args.momentum), comm)
    optimizer.setup(model)
    for param in model.params():
        if param.name not in ('beta', 'gamma'):
            param.update_rule.add_hook(WeightDecay(args.weight_decay))

    if device >= 0:
        chainer.cuda.get_device(device).use()
        model.to_gpu()

    updater = chainer.training.StandardUpdater(train_iter,
                                               optimizer,
                                               device=device)

    trainer = training.Trainer(updater, (args.epoch, 'epoch'), out=args.out)

    @make_shift('lr')
    def warmup_and_exponential_shift(trainer):
        epoch = trainer.updater.epoch_detail
        warmup_epoch = 5
        if epoch < warmup_epoch:
            if lr > 0.1:
                warmup_rate = 0.1 / lr
                rate = warmup_rate \
                    + (1 - warmup_rate) * epoch / warmup_epoch
            else:
                rate = 1
        elif epoch < 30:
            rate = 1
        elif epoch < 60:
            rate = 0.1
        elif epoch < 80:
            rate = 0.01
        else:
            rate = 0.001
        return rate * lr

    trainer.extend(warmup_and_exponential_shift)
    evaluator = chainermn.create_multi_node_evaluator(
        extensions.Evaluator(val_iter, model, device=device), comm)
    trainer.extend(evaluator, trigger=(1, 'epoch'))

    log_interval = 0.1, 'epoch'
    print_interval = 0.1, 'epoch'

    if comm.rank == 0:
        trainer.extend(chainer.training.extensions.observe_lr(),
                       trigger=log_interval)
        trainer.extend(extensions.snapshot_object(
            extractor, 'snapshot_model_{.updater.epoch}.npz'),
                       trigger=(args.epoch, 'epoch'))
        trainer.extend(extensions.LogReport(trigger=log_interval))
        trainer.extend(extensions.PrintReport([
            'iteration', 'epoch', 'elapsed_time', 'lr', 'main/loss',
            'validation/main/loss', 'main/accuracy', 'validation/main/accuracy'
        ]),
                       trigger=print_interval)
        trainer.extend(extensions.ProgressBar(update_interval=10))

    trainer.run()
Esempio n. 13
0
def main():
    model_cfgs = {
        'detnas_small_coco': {
            'class': DetNASSmallCOCO,
            'score_layer_name': 'fc',
            'kwargs': {
                #'n_class': 1000
            }
        },
    }
    parser = argparse.ArgumentParser(
        description='Learning convnet from ILSVRC2012 dataset')
    parser.add_argument('train', help='Path to root of the train dataset')
    parser.add_argument('val', help='Path to root of the validation dataset')
    parser.add_argument('--trial', action='store_true')
    parser.add_argument('--gpu', type=int, default=0)
    parser.add_argument(
        '--model',
        '-m',
        choices=model_cfgs.keys(),
        default='detnas_small_coco',
        help='Convnet models')
    parser.add_argument('--loaderjob', type=int, default=4)
    parser.add_argument(
        '--batchsize', type=int, help='Batch size for each worker')
    parser.add_argument('--lr', type=float)
    parser.add_argument('--momentum', type=float)
    parser.add_argument('--weight_decay', type=float)
    parser.add_argument('--out', type=str, default='result')
    parser.add_argument('--epoch', type=int)
    args = parser.parse_args()

    # https://docs.chainer.org/en/stable/chainermn/tutorial/tips_faqs.html#using-multiprocessiterator
    if hasattr(multiprocessing, 'set_start_method'):
        multiprocessing.set_start_method('forkserver')
        p = multiprocessing.Process()
        p.start()
        p.join()

    label_names = directory_parsing_label_names(args.train)

    model_cfg = model_cfgs[args.model]
    extractor = model_cfg['class'](
        n_class=len(label_names), **model_cfg['kwargs'])
    extractor.pick = model_cfg['score_layer_name']
    model = Classifier(extractor)

    train_data = DirectoryParsingLabelDataset(args.train)
    val_data = DirectoryParsingLabelDataset(args.val)
    train_data = TransformDataset(train_data, TrainTransform(extractor.mean))
    val_data = TransformDataset(val_data, ValTransform(extractor.mean))
    print('finished loading dataset')

    train_indices = np.arange(len(train_data)//(100 if args.trial else 1))
    val_indices = np.arange(len(val_data))


    """
    train_data = train_data.slice[train_indices]
    val_data = val_data.slice[val_indices]
    """
    train_iter = chainer.iterators.MultiprocessIterator(
        train_data, args.batchsize, n_processes=args.loaderjob)
    val_iter = iterators.MultiprocessIterator(
        val_data,
        args.batchsize,
        repeat=False,
        shuffle=False,
        n_processes=args.loaderjob)

    optimizer = CorrectedMomentumSGD(lr=args.lr, momentum=args.momentum)
    optimizer.setup(model)
    for param in model.params():
        if param.name not in ('beta', 'gamma'):
            param.update_rule.add_hook(WeightDecay(args.weight_decay))

    if args.gpu != -1:
        model.to_gpu(args.gpu)

    updater = chainer.training.StandardUpdater(
        train_iter, optimizer, device=args.gpu)

    trainer = training.Trainer(updater, (args.epoch, 'epoch'), out=args.out)

    trainer.extend(LinearShift('lr', (args.lr, 0.0),
                   (0, len(train_indices) / args.batchsize)))
    evaluator = extensions.Evaluator(val_iter, model)
    trainer.extend(evaluator, trigger=(1, 'epoch'))

    log_interval = 0.1, 'epoch'
    print_interval = 0.1, 'epoch'

    trainer.extend(
        chainer.training.extensions.observe_lr(), trigger=log_interval)
    trainer.extend(
        extensions.snapshot_object(extractor,
                                   'snapshot_model_{.updater.epoch}.npz'),
        trigger=(args.epoch, 'epoch'))
    trainer.extend(extensions.LogReport(trigger=log_interval))
    trainer.extend(
        extensions.PrintReport([
            'iteration', 'epoch', 'elapsed_time', 'lr', 'main/loss',
            'validation/main/loss', 'main/accuracy',
            'validation/main/accuracy'
        ]),
        trigger=print_interval)
    trainer.extend(extensions.ProgressBar(update_interval=10))

    trainer.run()
Esempio n. 14
0
def main():
    model_cfgs = {
        'resnet50': {'class': ResNet50, 'score_layer_name': 'fc6',
                     'kwargs': {'arch': 'fb'}},
        'resnet101': {'class': ResNet101, 'score_layer_name': 'fc6',
                      'kwargs': {'arch': 'fb'}},
        'resnet152': {'class': ResNet152, 'score_layer_name': 'fc6',
                      'kwargs': {'arch': 'fb'}}
    }
    parser = argparse.ArgumentParser(
        description='Learning convnet from ILSVRC2012 dataset')
    parser.add_argument('train', help='Path to root of the train dataset')
    parser.add_argument('val', help='Path to root of the validation dataset')

    parser.add_argument('--export', type=str, default=None,
                        help='Export the model to ONNX')
    parser.add_argument('--compile', type=str, default=None,
                        help='Compile the model')
    parser.add_argument('--computation_order', type=str, default=None,
                        help='Computation order in backpropagation')

    parser.add_argument('--model',
                        '-m', choices=model_cfgs.keys(), default='resnet50',
                        help='Convnet models')
    parser.add_argument('--communicator', type=str,
                        default='pure_nccl', help='Type of communicator')
    parser.add_argument('--loaderjob', type=int, default=4)
    parser.add_argument('--batchsize', type=int, default=32,
                        help='Batch size for each worker')
    parser.add_argument('--lr', type=float)
    parser.add_argument('--momentum', type=float, default=0.9)
    parser.add_argument('--weight-decay', type=float, default=0.0001)
    parser.add_argument('--out', type=str, default='result')
    parser.add_argument('--epoch', type=int, default=90)
    parser.add_argument('--iterations', '-I', type=int, default=None,
                        help='Number of iterations to train')
    parser.add_argument('--no_use_fixed_batch_dataset',
                        dest='use_fixed_batch_dataset',
                        action='store_false',
                        help='Disable the use of FixedBatchDataset')
    parser.add_argument('--compiler-log', action='store_true',
                        help='Enables compile-time logging')
    parser.add_argument('--trace', action='store_true',
                        help='Enables runtime tracing')
    parser.add_argument('--verbose', action='store_true',
                        help='Enables runtime verbose log')
    parser.add_argument('--skip_runtime_type_check', action='store_true',
                        help='Skip runtime type check')
    parser.add_argument('--dump_memory_usage', type=int, default=0,
                        help='Dump memory usage (0-2)')
    parser.add_argument('--quiet_period', type=int, default=0,
                        help='Quiet period after runtime report')
    parser.add_argument('--overwrite_batchsize', action='store_true',
                        help='Overwrite batch size')
    args = parser.parse_args()

    # https://docs.chainer.org/en/stable/chainermn/tutorial/tips_faqs.html#using-multiprocessiterator
    if hasattr(multiprocessing, 'set_start_method'):
        multiprocessing.set_start_method('forkserver')
        p = multiprocessing.Process()
        p.start()
        p.join()

    comm = chainermn.create_communicator(args.communicator)
    device = comm.intra_rank

    if args.lr is not None:
        lr = args.lr
    else:
        lr = 0.1 * (args.batchsize * comm.size) / 256
        if comm.rank == 0:
            print('lr={}: lr is selected based on the linear '
                  'scaling rule'.format(lr))

    label_names = directory_parsing_label_names(args.train)

    model_cfg = model_cfgs[args.model]
    extractor = model_cfg['class'](
        n_class=len(label_names), **model_cfg['kwargs'])
    extractor.pick = model_cfg['score_layer_name']

    # Following https://arxiv.org/pdf/1706.02677.pdf,
    # the gamma of the last BN of each resblock is initialized by zeros.
    for l in extractor.links():
        if isinstance(l, Bottleneck):
            l.conv3.bn.gamma.data[:] = 0

    if args.export is not None:
        chainer_compiler.use_unified_memory_allocator()
        extractor.to_device(device)
        x = extractor.xp.zeros((args.batchsize, 3, 224, 224)).astype('f')
        chainer_compiler.export(extractor, [x], args.export)
        return

    if args.compile is not None:
        print('run compiled model')
        chainer_compiler.use_chainerx_shared_allocator()
        extractor.to_device(device)
        # init params
        with chainer.using_config('enable_backprop', False),\
                chainer.using_config('train', False):
            x = extractor.xp.zeros((1, 3, 224, 224)).astype('f')
            extractor(x)

        compiler_kwargs = {}
        if args.compiler_log:
            compiler_kwargs['compiler_log'] = True
        runtime_kwargs = {}
        if args.trace:
            runtime_kwargs['trace'] = True
        if args.verbose:
            runtime_kwargs['verbose'] = True
        if args.skip_runtime_type_check:
            runtime_kwargs['check_types'] = False
        if args.dump_memory_usage >= 1:
            runtime_kwargs['dump_memory_usage'] = args.dump_memory_usage
            free, total = cupy.cuda.runtime.memGetInfo()
            used = total - free
            runtime_kwargs['base_memory_usage'] = used

        onnx_filename = args.compile
        if args.overwrite_batchsize:
            new_onnx_filename = ('/tmp/overwrite_batchsize_' +
                                 os.path.basename(onnx_filename))
            new_input_types = [
                input_rewriter.Type(shape=(args.batchsize, 3, 224, 224))
            ]
            input_rewriter.rewrite_onnx_file(onnx_filename,
                                             new_onnx_filename,
                                             new_input_types)
            onnx_filename = new_onnx_filename

        extractor_cc = chainer_compiler.compile_onnx(
            extractor,
            onnx_filename,
            'onnx_chainer',
            computation_order=args.computation_order,
            compiler_kwargs=compiler_kwargs,
            runtime_kwargs=runtime_kwargs,
            quiet_period=args.quiet_period)
        model = Classifier(extractor_cc)
    else:
        print('run vanilla chainer model')
        model = Classifier(extractor)

    train_data = DirectoryParsingLabelDataset(args.train)
    val_data = DirectoryParsingLabelDataset(args.val)
    train_data = TransformDataset(
        train_data, ('img', 'label'), TrainTransform(extractor.mean))
    val_data = TransformDataset(
        val_data, ('img', 'label'), ValTransform(extractor.mean))
    print('finished loading dataset')

    if comm.rank == 0:
        train_indices = np.arange(len(train_data))
        val_indices = np.arange(len(val_data))
    else:
        train_indices = None
        val_indices = None

    train_indices = chainermn.scatter_dataset(
        train_indices, comm, shuffle=True)
    val_indices = chainermn.scatter_dataset(val_indices, comm, shuffle=True)
    train_data = train_data.slice[train_indices]
    val_data = val_data.slice[val_indices]
    if args.use_fixed_batch_dataset:
        train_data = FixedBatchDataset(train_data, args.batchsize)
        val_data = FixedBatchDataset(val_data, args.batchsize)
    train_iter = chainer.iterators.MultiprocessIterator(
        train_data, args.batchsize, n_processes=args.loaderjob)
    val_iter = iterators.MultiprocessIterator(
        val_data, args.batchsize,
        repeat=False, shuffle=False, n_processes=args.loaderjob)

    optimizer = chainermn.create_multi_node_optimizer(
        CorrectedMomentumSGD(lr=lr, momentum=args.momentum), comm)
    optimizer.setup(model)
    for param in model.params():
        if param.name not in ('beta', 'gamma'):
            param.update_rule.add_hook(WeightDecay(args.weight_decay))

    if device >= 0:
        chainer.cuda.get_device(device).use()
        model.to_gpu()

    updater = chainer.training.StandardUpdater(
        train_iter, optimizer, device=device)

    if args.iterations:
        stop_trigger = (args.iterations, 'iteration')
    else:
        stop_trigger = (args.epoch, 'epoch')
    trainer = training.Trainer(
        updater, stop_trigger, out=args.out)

    @make_shift('lr')
    def warmup_and_exponential_shift(trainer):
        epoch = trainer.updater.epoch_detail
        warmup_epoch = 5
        if epoch < warmup_epoch:
            if lr > 0.1:
                warmup_rate = 0.1 / lr
                rate = warmup_rate \
                    + (1 - warmup_rate) * epoch / warmup_epoch
            else:
                rate = 1
        elif epoch < 30:
            rate = 1
        elif epoch < 60:
            rate = 0.1
        elif epoch < 80:
            rate = 0.01
        else:
            rate = 0.001
        return rate * lr

    trainer.extend(warmup_and_exponential_shift)
    evaluator = chainermn.create_multi_node_evaluator(
        extensions.Evaluator(val_iter, model, device=device), comm)
    trainer.extend(evaluator, trigger=(1, 'epoch'))

    log_interval = 0.1, 'epoch'
    print_interval = 0.1, 'epoch'

    if comm.rank == 0:
        trainer.extend(chainer.training.extensions.observe_lr(),
                       trigger=log_interval)
        trainer.extend(
            extensions.snapshot_object(
                extractor, 'snapshot_model_{.updater.epoch}.npz'),
            trigger=(args.epoch, 'epoch'))
        trainer.extend(extensions.LogReport(trigger=log_interval))
        trainer.extend(extensions.PrintReport(
            ['iteration', 'epoch', 'elapsed_time', 'lr',
             'main/loss', 'validation/main/loss',
             'main/accuracy', 'validation/main/accuracy']
        ), trigger=print_interval)
        trainer.extend(extensions.ProgressBar(update_interval=10))

    trainer.run()
Esempio n. 15
0
def main():
    archs = {
        'resnet50': {'class': ResNet50, 'score_layer_name': 'fc6',
                     'kwargs': {'arch': 'fb'}},
        'resnet101': {'class': ResNet101, 'score_layer_name': 'fc6',
                      'kwargs': {'arch': 'fb'}},
        'resnet152': {'class': ResNet152, 'score_layer_name': 'fc6',
                      'kwargs': {'arch': 'fb'}}
    }
    parser = argparse.ArgumentParser(
        description='Learning convnet from ILSVRC2012 dataset')
    parser.add_argument('train', help='Path to root of the train dataset')
    parser.add_argument('val', help='Path to root of the validation dataset')
    parser.add_argument('--arch',
                        '-a', choices=archs.keys(), default='resnet50',
                        help='Convnet architecture')
    parser.add_argument('--loaderjob', type=int, default=4)
    parser.add_argument('--batchsize', type=int, default=32,
                        help='Batch size for each worker')
    parser.add_argument('--lr', type=float)
    parser.add_argument('--momentum', type=float, default=0.9)
    parser.add_argument('--weight_decay', type=float, default=0.0001)
    parser.add_argument('--out', type=str, default='result')
    parser.add_argument('--epoch', type=int, default=90)

    parser.add_argument('--min', type=int, required=True,
                        help='Minimum number of processes')
    parser.add_argument('--start', type=int, required=True,
                        help='Number of processes to start')
    parser.add_argument('--bind', '-p', type=str, required=True,
                        help='address to bind gRPC server')
    parser.add_argument('--etcd', '-c', type=str, default='etcd://*****:*****@make_shift('lr')
        def warmup_and_exponential_shift(trainer):
            epoch = trainer.updater.epoch_detail
            warmup_epoch = 5
            if epoch < warmup_epoch:
                if lr > 0.1:
                    warmup_rate = 0.1 / lr
                    rate = warmup_rate \
                           + (1 - warmup_rate) * epoch / warmup_epoch
                else:
                    rate = 1
            elif epoch < 30:
                rate = 1
            elif epoch < 60:
                rate = 0.1
            elif epoch < 80:
                rate = 0.01
            else:
                rate = 0.001
            return rate * lr
        trainer.extend(warmup_and_exponential_shift)

        evaluator = chainermn.create_multi_node_evaluator(
            extensions.Evaluator(val_iter, model, device=device), comm)
        trainer.extend(evaluator, trigger=(1, 'epoch'))
        trainer.extend(comm.get_uninitializer(), trigger=(1, 'iteration'))

        log_interval = 0.1, 'epoch'
        print_interval = 0.5, 'epoch'
        plot_interval = 1, 'epoch'

        if comm.intra_rank == 0:
            # TODO: lr is not properly controlled for accuracy
            trainer.extend(chainer.training.extensions.observe_lr(),
                           trigger=log_interval)
            trainer.extend(echainer.extension.Lineage(comm, trigger=log_interval))
            trainer.extend(extensions.PrintReport(
                ['iteration', 'epoch', 'elapsed_time', 'lr',
                 'main/loss', 'validation/main/loss',
                 'main/accuracy', 'validation/main/accuracy'],
                log_report='Lineage'), trigger=print_interval)
            trainer.extend(extensions.ProgressBar(update_interval=10))

            if extensions.PlotReport.available():
                trainer.extend(
                    extensions.PlotReport(
                        ['main/loss', 'validation/main/loss'],
                        file_name='loss.png', trigger=plot_interval
                    ),
                    trigger=plot_interval
                )
                trainer.extend(
                    extensions.PlotReport(
                        ['main/accuracy', 'validation/main/accuracy'],
                        file_name='accuracy.png', trigger=plot_interval
                    ),
                trigger=plot_interval
                )

        # Optimizer includes model parameters and other params in optimizer
        comm.register_state('optimizer', optimizer)
        comm.register_state('iterator', train_iter)
        if retry or not comm.initial:
            (iteration, epoch) = comm.fetch_state('optimizer', optimizer)
            # train_iter.epoch = epoch
            comm.fetch_state('iterator', train_iter)
            updater.iteration = iteration

        optimizers = trainer.updater.get_all_optimizers()
        for name in optimizers.keys():
            optimizers[name].reset_prev_params()

        try:
            print('start trainer.run(), ', trainer.updater.iteration, trainer.updater.epoch)
            trainer.run()
            done = trainer._done
        except CommException as ce:
            print("Comm exception >>>>>>>>>>>", ce, updater.iteration, updater.epoch)
            comm.save_all_states(updater.iteration, updater.epoch)
            # Here comm will be ready to accept fetch state calls and once all
            # nodes got catched up it'll return and continue to run: TODO
            comm.sync_cluster(trainer.updater.get_all_optimizers())
            retry = True
            continue
        except ClusterUpdatedException as ce:
            print("Cluster updated: >>>>>>>>>>>", ce)
            comm.save_all_states(updater.iteration, updater.epoch)
            comm.sync_cluster(trainer.updater.get_all_optimizers())
            retry = True
            continue
        except Exception as e:
            print("Unexpected >>>>>>>>>>>", e)
            break

    comm.leave()
Esempio n. 16
0
def main():

    args = parser()
    # 時間読み込み
    now = datetime.datetime.now().strftime("%Y-%m-%d-%H-%M-%S")

    # 保存ディレクトリ先
    save_dir = Path('result') / now
    log_dir = save_dir / 'log'
    model_dir = save_dir / 'model'
    snap_dir = save_dir / 'snap'
    matrix_dir = save_dir / 'matrix'

    # 保存ディレクトリ先作成
    save_dir.mkdir(exist_ok=True, parents=True)
    log_dir.mkdir(exist_ok=True, parents=True)
    model_dir.mkdir(exist_ok=True, parents=True)
    snap_dir.mkdir(exist_ok=True, parents=True)
    matrix_dir.mkdir(exist_ok=True, parents=True)

    # Dataset読み込み
    root = args.dataset

    dir_list = os.listdir(root)
    dir_list.sort()

    if 'mean.npy' in dir_list:
        dir_list.remove('mean.npy')

    # datasetに画像ファイルとラベルを読み込む
    print('dataset loading ...')
    datasets = DirectoryParsingLabelDataset(root)
    print('finish!')

    # クラス数
    class_num = len(set(datasets.labels))
    print('class number : {}'.format(class_num))

    # fold数
    k_fold = args.kfold
    print('k_fold : {}'.format(k_fold))

    X = np.array([image_paths for image_paths in datasets.img_paths])
    y = np.array([label for label in datasets.labels])

    kfold = StratifiedKFold(n_splits=k_fold, shuffle=True, random_state=402).split(X, y)
    for k, (train_idx, val_idx) in enumerate(kfold):
        
        print("============= {} fold training =============".format(k + 1))
        X_train, y_train = X[train_idx], y[train_idx]
        X_val, y_val = X[val_idx], y[val_idx]
        # 画像とラベルをセットにしたデータセットを作る
        train = LabeledImageDataset([(x, y) for x, y in zip(X_train, y_train)])
        validation = LabeledImageDataset([(x, y) for x, y in zip(X_val, y_val)])

        train, validation, mean = get_dataset(train, validation, root, datasets, use_mean=False)

        # model setup
        model = StabilityClassifer(archs[args.arch](output=class_num))
        #model = ABNClassifier(archs[args.arch](output=class_num))
        lr = args.lr
        optimizer = chainer.optimizers.MomentumSGD(lr)
        optimizer.setup(model)
        optimizer.add_hook(chainer.optimizer.WeightDecay(rate=0.0001))
        # using GPU
        if args.gpu >= 0:
            chainer.cuda.get_device_from_id(args.gpu).use()
            model.to_gpu()

        # setup iterators
        train_iter = chainer.iterators.MultithreadIterator(train, args.batchsize, n_threads=8)
        validation_iter = chainer.iterators.MultithreadIterator(validation, args.batchsize,
                                                                repeat=False, shuffle=False, n_threads=8)
        # setup updater and trainer
        updater = training.StandardUpdater(
            train_iter, optimizer, device=args.gpu)
        trainer = training.Trainer(
            updater, (args.epoch, 'epoch'), out=save_dir)

        # set extensions
        log_trigger = (1, 'epoch')
        target = 'lr'
        trainer.extend(CosineShift(target, args.epoch, 1),
                       trigger=(1, "epoch"))

        trainer.extend(extensions.Evaluator(validation_iter, model, device=args.gpu),
                       trigger=log_trigger)

        snap_name = '{}-{}_fold_model.npz'.format(k_fold, k+1)
        trainer.extend(extensions.snapshot_object(model, str(snap_name)),
                       trigger=chainer.training.triggers.MaxValueTrigger(
                       key='validation/main/accuracy', trigger=(1, 'epoch')))

        log_name = '{}-{}_fold_log.json'.format(k_fold, k+1)
        trainer.extend(extensions.LogReport(
            log_name=str(log_name), trigger=log_trigger))

        trainer.extend(extensions.observe_lr(), trigger=log_trigger)
        trainer.extend(extensions.PrintReport([
            'epoch', 'iteration',
            'main/loss','main/lossL2', 'validation/main/loss',
            'main/accuracy', 'validation/main/accuracy',
            'elapsed_time', 'lr'
        ]), trigger=(1, 'epoch'))

        trainer.extend(extensions.PlotReport(['main/loss', 'validation/main/loss'],
                                             'epoch',file_name='loss{}.png'.format(k+1)))
        trainer.extend(extensions.PlotReport(['main/accuracy', 'validation/main/accuracy'],
                                             'epoch', file_name='accuracy{}.png'.format(k+1)))
        trainer.extend(extensions.ProgressBar(update_interval=10))
        #if args.resume:
            #chainer.serializers.load_npz(args.resume, trainer)

        trainer.run()

        snap_file = save_dir / snap_name
        shutil.move(str(snap_file), str(snap_dir))

        log_file = save_dir / log_name
        shutil.move(str(log_file), str(log_dir))

        # model save
        save_model = model_dir / "{}_{}-{}_fold.npz".format(now, k_fold, k + 1)
        chainer.serializers.save_npz(str(save_model), model)

        print("============= {} fold Evaluation =============".format(k + 1))
        # 画像フォルダ
        dnames = glob.glob('{}/*'.format(root))
        labels_list = []
        for d in dnames:
            p_dir = Path(d)
            labels_list.append(p_dir.name)
        if 'mean.npy' in labels_list:
            labels_list.remove('mean.npy')
        confusion_matrix_cocoa(validation, args.gpu, 8,
                               model, matrix_dir, k, labels_list)
Esempio n. 17
0
def load_dataset(path):
    dataset = DirectoryParsingLabelDataset(path)
    dataset = PreprocessDataset(dataset)
    train_size = int(len(dataset) * 0.9)
    train, test = split_dataset_random(dataset, train_size, seed=0)
    return train, test