def test_iterator_repeat(self):
     dataset = [1, 2, 3, 4, 5, 6]
     it = iterators.MultithreadIterator(dataset, 2, **self.options)
     for i in range(3):
         self.assertEqual(it.epoch, i)
         self.assertAlmostEqual(it.epoch_detail, i + 0 / 6)
         if i == 0:
             self.assertIsNone(it.previous_epoch_detail)
         else:
             self.assertAlmostEqual(it.previous_epoch_detail, i - 2 / 6)
         batch1 = it.next()
         self.assertEqual(len(batch1), 2)
         self.assertIsInstance(batch1, list)
         self.assertFalse(it.is_new_epoch)
         self.assertAlmostEqual(it.epoch_detail, i + 2 / 6)
         self.assertAlmostEqual(it.previous_epoch_detail, i + 0 / 6)
         batch2 = it.next()
         self.assertEqual(len(batch2), 2)
         self.assertIsInstance(batch2, list)
         self.assertFalse(it.is_new_epoch)
         self.assertAlmostEqual(it.epoch_detail, i + 4 / 6)
         self.assertAlmostEqual(it.previous_epoch_detail, i + 2 / 6)
         batch3 = it.next()
         self.assertEqual(len(batch3), 2)
         self.assertIsInstance(batch3, list)
         self.assertTrue(it.is_new_epoch)
         self.assertEqual(sorted(batch1 + batch2 + batch3), dataset)
         self.assertAlmostEqual(it.epoch_detail, i + 6 / 6)
         self.assertAlmostEqual(it.previous_epoch_detail, i + 4 / 6)
    def test_iterator_list_type(self):
        dataset = [[i, numpy.zeros((10, )) + i] for i in range(6)]
        it = iterators.MultithreadIterator(dataset, 2, **self.options)
        for i in range(3):
            self.assertEqual(it.epoch, i)
            self.assertAlmostEqual(it.epoch_detail, i)
            if i == 0:
                self.assertIsNone(it.previous_epoch_detail)
            else:
                self.assertAlmostEqual(it.previous_epoch_detail, i - 2 / 6)
            batches = {}
            for j in range(3):
                batch = it.next()
                self.assertEqual(len(batch), 2)
                if j != 2:
                    self.assertFalse(it.is_new_epoch)
                else:
                    self.assertTrue(it.is_new_epoch)
                self.assertAlmostEqual(it.epoch_detail,
                                       (3 * i + j + 1) * 2 / 6)
                self.assertAlmostEqual(it.previous_epoch_detail,
                                       (3 * i + j) * 2 / 6)
                for x in batch:
                    self.assertIsInstance(x, list)
                    self.assertIsInstance(x[1], numpy.ndarray)
                    batches[x[0]] = x[1]

            self.assertEqual(len(batches), len(dataset))
            for k, v in six.iteritems(batches):
                numpy.testing.assert_allclose(dataset[k][1], v)
Ejemplo n.º 3
0
    def test_invalid_order_sampler(self):
        dataset = [1, 2, 3, 4, 5, 6]

        with self.assertRaises(ValueError):
            it = iterators.MultithreadIterator(
                dataset, 6, order_sampler=InvalidOrderSampler())
            it.next()
Ejemplo n.º 4
0
def main():
    args = parse_args()
    cfg.merge_from_file(args.config)
    cfg.freeze

    model = setup_model(cfg)
    load_pretrained_model(cfg, args.config, model, args.pretrained_model)

    dataset = setup_dataset(cfg, 'eval')
    iterator = iterators.MultithreadIterator(dataset,
                                             args.batchsize,
                                             repeat=False,
                                             shuffle=False)

    model.use_preset('evaluate')
    if args.gpu >= 0:
        model.to_gpu(args.gpu)

    in_values, out_values, rest_values = apply_to_iterator(model.predict,
                                                           iterator,
                                                           hook=ProgressHook(
                                                               len(dataset)))
    # delete unused iterators explicitly
    del in_values

    if cfg.dataset.eval == 'COCO':
        eval_coco(out_values, rest_values)
    elif cfg.dataset.eval == 'VOC':
        eval_voc(out_values, rest_values)
    else:
        raise ValueError()
Ejemplo n.º 5
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--dataset', choices=('sbd', 'coco'))
    parser.add_argument('--model', choices=sorted(models.keys()))
    parser.add_argument('--pretrained-model')
    parser.add_argument('--batchsize', type=int)
    args = parser.parse_args()

    comm = chainermn.create_communicator('pure_nccl')
    device = comm.intra_rank

    dataset, label_names, eval_, model, batchsize = setup(
        args.dataset, args.model, args.pretrained_model, args.batchsize)

    chainer.cuda.get_device_from_id(device).use()
    model.to_gpu()

    if not comm.rank == 0:
        apply_to_iterator(model.predict, None, comm=comm)
        return

    iterator = iterators.MultithreadIterator(
        dataset, batchsize * comm.size, repeat=False, shuffle=False)

    in_values, out_values, rest_values = apply_to_iterator(
        model.predict, iterator, hook=ProgressHook(len(dataset)), comm=comm)
    # delete unused iterators explicitly
    del in_values

    eval_(out_values, rest_values)
Ejemplo n.º 6
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--dataset', choices=('voc', 'coco'))
    parser.add_argument('--model', choices=sorted(models.keys()))
    parser.add_argument('--pretrained-model')
    parser.add_argument('--batchsize', type=int)
    parser.add_argument('--gpu', type=int, default=-1)
    args = parser.parse_args()

    dataset, eval_, model, batchsize = setup(args.dataset, args.model,
                                             args.pretrained_model,
                                             args.batchsize)

    if args.gpu >= 0:
        chainer.cuda.get_device_from_id(args.gpu).use()
        model.to_gpu()

    model.use_preset('evaluate')

    iterator = iterators.MultithreadIterator(dataset,
                                             batchsize,
                                             repeat=False,
                                             shuffle=False)

    in_values, out_values, rest_values = apply_to_iterator(model.predict,
                                                           iterator,
                                                           hook=ProgressHook(
                                                               len(dataset)))
    # delete unused iterators explicitly
    del in_values

    eval_(out_values, rest_values)
 def test_supported_reset_middle(self):
     dataset = [1, 2, 3, 4, 5]
     it = iterators.MultithreadIterator(dataset,
                                        2,
                                        repeat=False,
                                        **self.options)
     it.next()
     it.reset()
 def test_supported_reset_repeat(self):
     dataset = [1, 2, 3, 4]
     it = iterators.MultithreadIterator(dataset,
                                        2,
                                        repeat=True,
                                        **self.options)
     it.next()
     it.next()
     it.reset()
 def test_supported_reset_finalized(self):
     dataset = [1, 2, 3, 4]
     it = iterators.MultithreadIterator(dataset,
                                        2,
                                        repeat=False,
                                        **self.options)
     it.next()
     it.next()
     it.finalize()
     it.reset()
Ejemplo n.º 10
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--gpu', type=int, default=-1)
    parser.add_argument('--model', choices=('resnet50', 'resnet101'))
    parser.add_argument(
        '--mean', choices=('chainercv', 'detectron'), default='chainercv')
    parser.add_argument('--batchsize', type=int, default=1)
    group = parser.add_mutually_exclusive_group()
    group.add_argument('--pretrained-model')
    group.add_argument('--snapshot')
    args = parser.parse_args()

    if args.model == 'resnet50':
        model = FasterRCNNFPNResNet50(n_fg_class=len(coco_bbox_label_names),
                                      mean=args.mean)
    elif args.model == 'resnet101':
        model = FasterRCNNFPNResNet101(n_fg_class=len(coco_bbox_label_names),
                                       mean=args.mean)

    if args.pretrained_model:
        chainer.serializers.load_npz(args.pretrained_model, model)
    elif args.snapshot:
        chainer.serializers.load_npz(
            args.snapshot, model, path='updater/model:main/model/')

    if args.gpu >= 0:
        chainer.cuda.get_device_from_id(args.gpu).use()
        model.to_gpu()

    model.use_preset('evaluate')

    dataset = COCOBboxDataset(
        split='minival',
        use_crowded=True,
        return_area=True,
        return_crowded=True)
    iterator = iterators.MultithreadIterator(
        dataset, args.batchsize, repeat=False, shuffle=False)

    in_values, out_values, rest_values = apply_to_iterator(
        model.predict, iterator, hook=ProgressHook(len(dataset)))
    # delete unused iterators explicitly
    del in_values

    pred_bboxes, pred_labels, pred_scores = out_values
    gt_bboxes, gt_labels, gt_area, gt_crowded = rest_values

    result = eval_detection_coco(
        pred_bboxes, pred_labels, pred_scores,
        gt_bboxes, gt_labels, gt_area, gt_crowded)

    print()
    for area in ('all', 'large', 'medium', 'small'):
        print('mmAP ({}):'.format(area),
              result['map/iou=0.50:0.95/area={}/max_dets=100'.format(area)])
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--dataset',
                        choices=('cityscapes', 'ade20k', 'camvid', 'voc'))
    parser.add_argument('--model',
                        choices=('pspnet_resnet101', 'segnet',
                                 'deeplab_v3plus_xception65'))
    parser.add_argument('--pretrained-model')
    parser.add_argument('--input-size', type=int, default=None)
    args = parser.parse_args()

    comm = chainermn.create_communicator('pure_nccl')
    device = comm.intra_rank

    if args.input_size is None:
        input_size = None
    else:
        input_size = (args.input_size, args.input_size)

    dataset, label_names, model = get_dataset_and_model(
        args.dataset, args.model, args.pretrained_model, input_size)

    chainer.cuda.get_device_from_id(device).use()
    model.to_gpu()

    if not comm.rank == 0:
        apply_to_iterator(model.predict, None, comm=comm)
        return

    it = iterators.MultithreadIterator(dataset,
                                       comm.size,
                                       repeat=False,
                                       shuffle=False)

    in_values, out_values, rest_values = apply_to_iterator(model.predict,
                                                           it,
                                                           hook=ProgressHook(
                                                               len(dataset)),
                                                           comm=comm)
    # Delete an iterator of images to save memory usage.
    del in_values
    pred_labels, = out_values
    gt_labels, = rest_values

    result = eval_semantic_segmentation(pred_labels, gt_labels)

    for iu, label_name in zip(result['iou'], label_names):
        print('{:>23} : {:.4f}'.format(label_name, iu))
    print('=' * 34)
    print('{:>23} : {:.4f}'.format('mean IoU', result['miou']))
    print('{:>23} : {:.4f}'.format('Class average accuracy',
                                   result['mean_class_accuracy']))
    print('{:>23} : {:.4f}'.format('Global average accuracy',
                                   result['pixel_accuracy']))
Ejemplo n.º 12
0
    def test_no_same_indices_order_sampler(self):
        dataset = [1, 2, 3, 4, 5, 6]
        batchsize = 5

        it = iterators.MultithreadIterator(
            dataset,
            batchsize,
            order_sampler=NoSameIndicesOrderSampler(batchsize))
        for _ in range(5):
            batch = it.next()
            self.assertEqual(len(numpy.unique(batch)), batchsize)
Ejemplo n.º 13
0
    def test_iterator_not_repeat(self):
        dataset = [1, 2, 3, 4, 5]
        it = iterators.MultithreadIterator(dataset,
                                           2,
                                           repeat=False,
                                           **self.options)

        batches = sum([it.next() for _ in range(3)], [])
        self.assertEqual(sorted(batches), dataset)
        for _ in range(2):
            self.assertRaises(StopIteration, it.next)
Ejemplo n.º 14
0
    def test_iterator_serialize(self):
        dataset = [1, 2, 3, 4, 5, 6]
        it = iterators.MultithreadIterator(dataset, 2, **self.options)

        self.assertEqual(it.epoch, 0)
        self.assertAlmostEqual(it.epoch_detail, 0 / 6)
        self.assertIsNone(it.previous_epoch_detail)
        batch1 = it.next()
        self.assertEqual(len(batch1), 2)
        self.assertIsInstance(batch1, list)
        self.assertFalse(it.is_new_epoch)
        self.assertAlmostEqual(it.epoch_detail, 2 / 6)
        self.assertAlmostEqual(it.previous_epoch_detail, 0 / 6)
        batch2 = it.next()
        self.assertEqual(len(batch2), 2)
        self.assertIsInstance(batch2, list)
        self.assertFalse(it.is_new_epoch)
        self.assertAlmostEqual(it.epoch_detail, 4 / 6)
        self.assertAlmostEqual(it.previous_epoch_detail, 2 / 6)

        target = dict()
        it.serialize(DummySerializer(target))

        it = iterators.MultithreadIterator(dataset, 2, **self.options)
        it.serialize(DummyDeserializer(target))
        self.assertFalse(it.is_new_epoch)
        self.assertAlmostEqual(it.epoch_detail, 4 / 6)
        self.assertAlmostEqual(it.previous_epoch_detail, 2 / 6)

        batch3 = it.next()
        self.assertEqual(len(batch3), 2)
        self.assertIsInstance(batch3, list)
        self.assertTrue(it.is_new_epoch)
        self.assertEqual(sorted(batch1 + batch2 + batch3), dataset)
        self.assertAlmostEqual(it.epoch_detail, 6 / 6)
        self.assertAlmostEqual(it.previous_epoch_detail, 4 / 6)
Ejemplo n.º 15
0
def main():
    args = parse_args()
    cfg.merge_from_file(args.config)
    cfg.freeze

    comm = chainermn.create_communicator('pure_nccl')
    device = comm.intra_rank

    model = setup_model(cfg)
    load_pretrained_model(cfg, args.config, model, args.pretrained_model)
    dataset = setup_dataset(cfg, 'eval')

    model.use_preset('evaluate')
    chainer.cuda.get_device_from_id(device).use()
    model.to_gpu()

    if not comm.rank == 0:
        apply_to_iterator(model.predict, None, comm=comm)
        return

    iterator = iterators.MultithreadIterator(dataset,
                                             args.batchsize * comm.size,
                                             repeat=False,
                                             shuffle=False)

    in_values, out_values, rest_values = apply_to_iterator(model.predict,
                                                           iterator,
                                                           hook=ProgressHook(
                                                               len(dataset)),
                                                           comm=comm)
    # delete unused iterators explicitly
    del in_values

    if cfg.dataset.eval == 'COCO':
        eval_coco(out_values, rest_values)
    elif cfg.dataset.eval == 'VOC':
        eval_voc(out_values, rest_values)
    else:
        raise ValueError()
Ejemplo n.º 16
0
    def test_iterator_not_repeat_not_even(self):
        dataset = [1, 2, 3, 4, 5]
        it = iterators.MultithreadIterator(dataset,
                                           2,
                                           repeat=False,
                                           **self.options)

        self.assertAlmostEqual(it.epoch_detail, 0 / 5)
        self.assertIsNone(it.previous_epoch_detail)
        batch1 = it.next()
        self.assertAlmostEqual(it.epoch_detail, 2 / 5)
        self.assertAlmostEqual(it.previous_epoch_detail, 0 / 5)
        batch2 = it.next()
        self.assertAlmostEqual(it.epoch_detail, 4 / 5)
        self.assertAlmostEqual(it.previous_epoch_detail, 2 / 5)
        batch3 = it.next()
        self.assertAlmostEqual(it.epoch_detail, 5 / 5)
        self.assertAlmostEqual(it.previous_epoch_detail, 4 / 5)
        self.assertRaises(StopIteration, it.next)

        self.assertEqual(len(batch3), 1)
        self.assertEqual(sorted(batch1 + batch2 + batch3), dataset)
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--dataset',
                        choices=('cityscapes', 'ade20k', 'camvid', 'voc'))
    parser.add_argument('--model', choices=sorted(models.keys()))
    parser.add_argument('--pretrained-model')
    parser.add_argument('--batchsize', type=int)
    parser.add_argument('--input-size', type=int, default=None)
    args = parser.parse_args()

    comm = chainermn.create_communicator('pure_nccl')
    device = comm.intra_rank

    dataset, eval_, model, batchsize = setup(args.dataset, args.model,
                                             args.pretrained_model,
                                             args.batchsize, args.input_size)

    chainer.cuda.get_device_from_id(device).use()
    model.to_gpu()

    if not comm.rank == 0:
        apply_to_iterator(model.predict, None, comm=comm)
        return

    it = iterators.MultithreadIterator(dataset,
                                       batchsize * comm.size,
                                       repeat=False,
                                       shuffle=False)

    in_values, out_values, rest_values = apply_to_iterator(model.predict,
                                                           it,
                                                           hook=ProgressHook(
                                                               len(dataset)),
                                                           comm=comm)
    # Delete an iterator of images to save memory usage.
    del in_values

    eval_(out_values, rest_values)
Ejemplo n.º 18
0
    valid_dataset = SceneDatasetCV(data, args.input_len, args.offset_len,
                                   args.pred_len, args.width, args.height,
                                   data_dir, valid_split, -1, False, "scale"
                                   in args.model, args.ego_type)
    logger.info(valid_dataset.X.shape)

    # X: input, Y: output, poses, egomotions
    data_idxs = [0, 1, 2, 7]
    if data_idxs is None:
        logger.info("Invalid argument: model={}".format(args.model))
        exit(1)

    model = get_model(args)
    valid_iterator = iterators.MultithreadIterator(valid_dataset,
                                                   args.batch_size,
                                                   False,
                                                   False,
                                                   n_threads=args.nb_jobs)
    valid_eval = Evaluator_Direct("valid", args)

    prediction_dict = {"arguments": vars(args), "predictions": {}}
    valid_iterator = iterators.MultiprocessIterator(valid_dataset,
                                                    args.batch_size,
                                                    False,
                                                    False,
                                                    n_processes=args.nb_jobs)
    valid_eval = Evaluator("valid", args)

    logger.info("Evaluation...")
    chainer.config.train = False
    chainer.config.enable_backprop = False
Ejemplo n.º 19
0
def main():
    # command line argument parsing
    parser = argparse.ArgumentParser(
        description='Multi-Perceptron classifier/regressor')
    parser.add_argument('train', help='Path to csv file')
    parser.add_argument('--root',
                        '-R',
                        default="betti",
                        help='Path to image files')
    parser.add_argument('--val',
                        help='Path to validation csv file',
                        required=True)
    parser.add_argument('--regress',
                        '-r',
                        action='store_true',
                        help='set for regression, otherwise classification')
    parser.add_argument('--time_series',
                        '-ts',
                        action='store_true',
                        help='set for time series data')
    parser.add_argument('--batchsize',
                        '-b',
                        type=int,
                        default=10,
                        help='Number of samples in each mini-batch')
    parser.add_argument('--layer',
                        '-l',
                        type=str,
                        choices=['res5', 'pool5'],
                        default='pool5',
                        help='output layer of the pretrained ResNet')
    parser.add_argument('--fch',
                        type=int,
                        nargs="*",
                        default=[],
                        help='numbers of channels for the last fc layers')
    parser.add_argument('--cols',
                        '-c',
                        type=int,
                        nargs="*",
                        default=[1],
                        help='column indices in csv of target variables')
    parser.add_argument('--epoch',
                        '-e',
                        type=int,
                        default=300,
                        help='Number of sweeps over the dataset to train')
    parser.add_argument('--snapshot',
                        '-s',
                        type=int,
                        default=100,
                        help='snapshot interval')
    parser.add_argument('--initmodel',
                        '-i',
                        help='Initialize the model from given file')
    parser.add_argument('--random',
                        '-rt',
                        type=int,
                        default=1,
                        help='random translation')
    parser.add_argument('--gpu',
                        '-g',
                        type=int,
                        help='GPU ID (negative value indicates CPU)')
    parser.add_argument('--loaderjob',
                        '-j',
                        type=int,
                        default=3,
                        help='Number of parallel data loading processes')
    parser.add_argument('--outdir',
                        '-o',
                        default='result',
                        help='Directory to output the result')
    parser.add_argument('--optimizer',
                        '-op',
                        choices=optim.keys(),
                        default='Adam',
                        help='optimizer')
    parser.add_argument('--resume',
                        type=str,
                        default=None,
                        help='Resume the training from snapshot')
    parser.add_argument('--predict',
                        '-p',
                        action='store_true',
                        help='prediction with a specified model')
    parser.add_argument('--tuning_rate',
                        '-tr',
                        type=float,
                        default=0.1,
                        help='learning rate for pretrained layers')
    parser.add_argument('--dropout',
                        '-dr',
                        type=float,
                        default=0,
                        help='dropout ratio for the FC layers')
    parser.add_argument('--cw',
                        '-cw',
                        type=int,
                        default=128,
                        help='crop image width')
    parser.add_argument('--ch',
                        '-ch',
                        type=int,
                        default=128,
                        help='crop image height')
    parser.add_argument('--weight_decay',
                        '-w',
                        type=float,
                        default=1e-6,
                        help='weight decay for regularization')
    parser.add_argument('--wd_norm',
                        '-wn',
                        choices=['none', 'l1', 'l2'],
                        default='l2',
                        help='norm of weight decay for regularization')
    parser.add_argument('--dtype',
                        '-dt',
                        choices=dtypes.keys(),
                        default='fp32',
                        help='floating point precision')
    args = parser.parse_args()

    args.outdir = os.path.join(args.outdir, dt.now().strftime('%m%d_%H%M'))
    # Enable autotuner of cuDNN
    chainer.config.autotune = True
    chainer.config.dtype = dtypes[args.dtype]
    chainer.print_runtime_info()

    # read csv file
    train = Dataset(args.root,
                    args.train,
                    cw=args.cw,
                    ch=args.ch,
                    random=args.random,
                    regression=args.regress,
                    time_series=args.time_series,
                    cols=args.cols)
    test = Dataset(args.root,
                   args.val,
                   cw=args.cw,
                   ch=args.ch,
                   regression=args.regress,
                   time_series=args.time_series,
                   cols=args.cols)

    ##
    if not args.gpu:
        if chainer.cuda.available:
            args.gpu = 0
        else:
            args.gpu = -1
    print(args)
    save_args(args, args.outdir)

    if args.regress:
        accfun = F.mean_absolute_error
        lossfun = F.mean_squared_error
        args.chs = len(args.cols)
    else:
        accfun = F.accuracy
        lossfun = F.softmax_cross_entropy
        args.chs = max(train.chs, test.chs)
        if len(args.cols) > 1:
            print("\n\nClassification only works with a single target.\n\n")
            exit()

    # Set up a neural network to train
    model = L.Classifier(Resnet(args), lossfun=lossfun, accfun=accfun)

    # Set up an optimizer
    optimizer = optim[args.optimizer]()
    optimizer.setup(model)
    if args.weight_decay > 0:
        if args.wd_norm == 'l2':
            optimizer.add_hook(chainer.optimizer.WeightDecay(
                args.weight_decay))
        elif args.wd_norm == 'l1':
            optimizer.add_hook(chainer.optimizer_hooks.Lasso(
                args.weight_decay))
    # slow update for pretrained layers
    if args.optimizer in ['Adam']:
        for func_name in model.predictor.base._children:
            for param in model.predictor.base[func_name].params():
                param.update_rule.hyperparam.alpha *= args.tuning_rate

    if args.initmodel:
        print('Load model from: ', args.initmodel)
        chainer.serializers.load_npz(args.initmodel, model)
    if args.gpu >= 0:
        chainer.cuda.get_device(args.gpu).use()  # Make a specified GPU current
        model.to_gpu()  # Copy the model to the GPU

    # select numpy or cupy
    xp = chainer.cuda.cupy if args.gpu >= 0 else np

    #    train_iter = iterators.SerialIterator(train, args.batchsize, shuffle=True)
    #    test_iter = iterators.SerialIterator(test, args.batchsize, repeat=False, shuffle=False)
    train_iter = iterators.MultithreadIterator(train,
                                               args.batchsize,
                                               shuffle=True,
                                               n_threads=args.loaderjob)
    test_iter = iterators.MultithreadIterator(test,
                                              args.batchsize,
                                              repeat=False,
                                              shuffle=False,
                                              n_threads=args.loaderjob)

    updater = training.StandardUpdater(train_iter, optimizer, device=args.gpu)
    trainer = training.Trainer(updater, (args.epoch, 'epoch'), out=args.outdir)

    frequency = args.epoch if args.snapshot == -1 else max(1, args.snapshot)
    log_interval = 1, 'epoch'
    val_interval = 20, 'epoch'  # frequency/10, 'epoch'

    #    trainer.extend(extensions.snapshot(filename='snapshot_epoch_{.updater.epoch}'), trigger=(frequency, 'epoch'))
    trainer.extend(extensions.snapshot_object(model,
                                              'model_epoch_{.updater.epoch}'),
                   trigger=(frequency, 'epoch'))
    trainer.extend(extensions.LogReport(trigger=log_interval))
    trainer.extend(extensions.dump_graph('main/loss'))
    trainer.extend(extensions.Evaluator(test_iter, model, device=args.gpu),
                   trigger=val_interval)

    if args.optimizer in ['Momentum', 'AdaGrad', 'RMSprop']:
        trainer.extend(extensions.observe_lr(), trigger=log_interval)
        trainer.extend(extensions.ExponentialShift('lr', 0.5),
                       trigger=(args.epoch / 5, 'epoch'))
    elif args.optimizer in ['Adam']:
        trainer.extend(extensions.observe_lr(), trigger=log_interval)
        trainer.extend(extensions.ExponentialShift("alpha",
                                                   0.5,
                                                   optimizer=optimizer),
                       trigger=(args.epoch / 5, 'epoch'))

    if extensions.PlotReport.available():
        trainer.extend(
            extensions.PlotReport(['main/loss', 'validation/main/loss'],
                                  'epoch',
                                  file_name='loss.png'))
        trainer.extend(
            extensions.PlotReport(
                ['main/accuracy', 'validation/main/accuracy'],
                'epoch',
                file_name='accuracy.png'))

    trainer.extend(extensions.PrintReport([
        'epoch', 'main/loss', 'main/accuracy', 'validation/main/loss',
        'validation/main/accuracy', 'elapsed_time', 'lr'
    ]),
                   trigger=log_interval)

    trainer.extend(extensions.ProgressBar(update_interval=10))

    if args.resume:
        chainer.serializers.load_npz(args.resume, trainer)

    # ChainerUI
    #trainer.extend(CommandsExtension())
    trainer.extend(extensions.LogReport(trigger=log_interval))

    if not args.predict:
        trainer.run()

    ## prediction
    print("predicting: {} entries...".format(len(test)))
    test_iter = iterators.SerialIterator(test,
                                         args.batchsize,
                                         repeat=False,
                                         shuffle=False)
    converter = concat_examples
    idx = 0
    with open(os.path.join(args.outdir, 'result.txt'), 'w') as output:
        for batch in test_iter:
            x, t = converter(batch, device=args.gpu)
            with chainer.using_config('train', False):
                with chainer.function.no_backprop_mode():
                    if args.regress:
                        y = model.predictor(x).data
                        if args.gpu > -1:
                            y = xp.asnumpy(y)
                            t = xp.asnumpy(t)
                        y = y * test.std + test.mean
                        t = t * test.std + test.mean
                    else:
                        y = F.softmax(model.predictor(x)).data
                        if args.gpu > -1:
                            y = xp.asnumpy(y)
                            t = xp.asnumpy(t)
            for i in range(y.shape[0]):
                output.write(os.path.basename(test.ids[idx]))
                if (len(t.shape) > 1):
                    for j in range(t.shape[1]):
                        output.write(",{}".format(t[i, j]))
                        output.write(",{}".format(y[i, j]))
                else:
                    output.write(",{}".format(t[i]))
                    output.write(",{}".format(np.argmax(y[i, :])))
                    for yy in y[i]:
                        output.write(",{0:1.5f}".format(yy))
                output.write("\n")
                idx += 1
Ejemplo n.º 20
0
def train(args):
    """ Training model with chainer backend.
    This function is called from eend/bin/train.py with
    parsed command-line arguments.
    """
    np.random.seed(args.seed)
    #   cp.random.seed(args.seed)
    os.environ['CHAINER_SEED'] = str(args.seed)
    chainer.global_config.cudnn_deterministic = True

    train_set = KaldiDiarizationDataset(
        args.train_data_dir,
        chunk_size=args.num_frames,
        context_size=args.context_size,
        input_transform=args.input_transform,
        frame_size=args.frame_size,
        frame_shift=args.frame_shift,
        subsampling=args.subsampling,
        rate=args.sampling_rate,
        use_last_samples=True,
        label_delay=args.label_delay,
        n_speakers=args.num_speakers,
    )
    dev_set = KaldiDiarizationDataset(
        args.valid_data_dir,
        chunk_size=args.num_frames,
        context_size=args.context_size,
        input_transform=args.input_transform,
        frame_size=args.frame_size,
        frame_shift=args.frame_shift,
        subsampling=args.subsampling,
        rate=args.sampling_rate,
        use_last_samples=True,
        label_delay=args.label_delay,
        n_speakers=args.num_speakers,
    )

    # Prepare model
    n, Y, T = train_set.get_example(0)

    if args.model_type == 'Transformer':
        model = TransformerDiarization(
            in_size=Y.shape[1],
            n_units=args.hidden_size,
            n_heads=args.transformer_encoder_n_heads,
            n_layers=args.transformer_encoder_n_layers,
            dropout=args.transformer_encoder_dropout,
            alpha=args.transformer_alpha)
    else:
        raise ValueError('Possible model_type is "Transformer"')

    if args.gpu >= 0:
        gpuid = use_single_gpu()
        print('GPU device {} is used'.format(gpuid))
        model.to_gpu()
    else:
        gpuid = -1
    print('Prepared model')
    #gpuid = 2

    # Setup optimizer
    if args.optimizer == 'adam':
        optimizer = optimizers.Adam(alpha=args.lr)
    elif args.optimizer == 'sgd':
        optimizer = optimizers.SGD(lr=args.lr)
    elif args.optimizer == 'noam':
        optimizer = optimizers.Adam(alpha=0, beta1=0.9, beta2=0.98, eps=1e-9)
    else:
        raise ValueError(args.optimizer)

    optimizer.setup(model)
    if args.gradclip > 0:
        optimizer.add_hook(
            chainer.optimizer_hooks.GradientClipping(args.gradclip))

    # Init/Resume
    if args.initmodel:
        serializers.load_npz(args.initmodel, model)
        print('Load model from', args.initmodel)

    print("Done")

    # train_iter = iterators.MultiprocessIterator(
    #         train_set,
    #         batch_size=args.batchsize,
    #         repeat=True, shuffle=True,
    #         # shared_mem=64000000,
    #         shared_mem=None,
    #         n_processes=4,
    #         n_prefetch=2,
    #         maxtasksperchild=2)

    # dev_iter = iterators.MultiprocessIterator(
    #         dev_set,
    #         batch_size=args.batchsize,
    #         repeat=False, shuffle=False,
    #         # shared_mem=64000000,
    #         shared_mem=None,
    #         n_processes=4,
    #         n_prefetch=2,
    #         maxtasksperchild=2)

    train_iter = iterators.MultithreadIterator(train_set,
                                               batch_size=args.batchsize,
                                               repeat=True,
                                               shuffle=True,
                                               n_threads=10)
    dev_iter = iterators.MultithreadIterator(dev_set,
                                             batch_size=args.batchsize,
                                             repeat=False,
                                             shuffle=False,
                                             n_threads=10)

    batchs = train_iter.next()
    for (n, Y, T) in batchs:
        print(n, Y.shape, T.shape)

    if args.gradient_accumulation_steps > 1:
        updater = GradientAccumulationUpdater(train_iter,
                                              optimizer,
                                              converter=_convert,
                                              device=gpuid)
    else:
        updater = training.StandardUpdater(train_iter,
                                           optimizer,
                                           converter=_convert,
                                           device=gpuid)

    trainer = training.Trainer(updater, (args.max_epochs, 'epoch'),
                               out=os.path.join(args.model_save_dir))

    evaluator = extensions.Evaluator(dev_iter,
                                     model,
                                     converter=_convert,
                                     device=gpuid)
    #    evaluator_1 = extensions.Evaluator(
    #            dev_iter, model, converter=_convert, device=gpuid)
    trainer.extend(evaluator)
    #    trainer.extend(evaluator_1)   # to test knowing the speaker labels before hand

    if args.optimizer == 'noam':
        trainer.extend(NoamScheduler(args.hidden_size,
                                     warmup_steps=args.noam_warmup_steps,
                                     scale=args.noam_scale),
                       trigger=(1, 'iteration'))

    if args.resume:
        chainer.serializers.load_npz(args.resume, trainer)

    # MICRO AVERAGE
    metrics = [('diarization_error', 'speaker_scored', 'DER'),
               ('speech_miss', 'speech_scored', 'SAD_MR'),
               ('speech_falarm', 'speech_scored', 'SAD_FR'),
               ('speaker_miss', 'speaker_scored', 'MI'),
               ('speaker_falarm', 'speaker_scored', 'FA'),
               ('speaker_error', 'speaker_scored', 'CF'),
               ('correct', 'frames', 'accuracy')]
    for num, den, name in metrics:
        trainer.extend(
            extensions.MicroAverage('main/{}'.format(num),
                                    'main/{}'.format(den),
                                    'main/{}'.format(name)))
        trainer.extend(
            extensions.MicroAverage('validation/main/{}'.format(num),
                                    'validation/main/{}'.format(den),
                                    'validation/main/{}'.format(name)))
#        trainer.extend(extensions.MicroAverage(
#            'validation_1/main/{}'.format(num),
#            'validation_1/main/{}'.format(den),
#            'validation_1/main/{}'.format(name)))

    trainer.extend(
        extensions.LogReport(log_name='log_iter', trigger=(1000, 'iteration')))

    trainer.extend(extensions.LogReport())
    trainer.extend(
        extensions.PrintReport([
            'epoch', 'main/loss', 'validation/main/loss', 'main/DER',
            'validation/main/DER', 'elapsed_time'
        ]))
    trainer.extend(
        extensions.PlotReport(['main/loss', 'validation/main/loss'],
                              x_key='epoch',
                              file_name='loss.png'))
    trainer.extend(
        extensions.PlotReport(['main/DER', 'validation/main/DER'],
                              x_key='epoch',
                              file_name='DER.png'))
    trainer.extend(extensions.ProgressBar(update_interval=100))
    trainer.extend(
        extensions.snapshot(filename='snapshot_epoch-{.updater.epoch}'))

    #trainer.extend(extensions.dump_graph('main/loss', out_name="cg.dot"))
    print("Started")
    trainer.run()
    print('Finished!')
Ejemplo n.º 21
0
    def test_iterator_repeat_not_even(self):
        dataset = [1, 2, 3, 4, 5]
        it = iterators.MultithreadIterator(dataset, 2, **self.options)

        batches = sum([it.next() for _ in range(5)], [])
        self.assertEqual(sorted(batches), sorted(dataset * 2))
Ejemplo n.º 22
0
        h = F.relu(self.l3(h))
        h = self.l4(h)
        return h


# ------データ作成---------
x = np.array(input_board, 'float32')
t = np.array(output_stone, 'int32')
# read_fileと一連託生
dataset = TupleDataset(x, t)
ss = len(dataset)
split_at = int(ss * 0.8)
train, test = split_dataset_random(dataset, split_at, seed=0)
#  すべての試合の8割を訓練と検証に、、
train_iter = iterators.MultithreadIterator(train,
                                           batch_size=100,
                                           repeat=True,
                                           shuffle=True)
#  訓練データを100個=1セットに シャッフルもするお!
valid_iter = iterators.MultithreadIterator(test,
                                           batch_size=100,
                                           shuffle=False,
                                           repeat=False)
#  ------end------

N = N()  # ネットをつくるお
model = L.Classifier(N)  # classfierのデフォ損失関数はF.softmax_cross_entropy
optimizer = optimizers.SGD(lr=0.1)  # 勾配関数
optimizer.setup(model)
updater = training.StandardUpdater(train_iter, optimizer)
#  updater イテレータ・オプティマイザを統括し、順伝播・損失・逆伝播の計算、そしてパラメータの更新(オプティマイザの呼び出し)という、
#  訓練ループ内の定型的な処理を実行します。 by tutorial
Ejemplo n.º 23
0
 def test_iterator_shuffle_nondivisible(self):
     dataset = list(range(10))
     it = iterators.MultithreadIterator(dataset, 3, **self.options)
     out = sum([it.next() for _ in range(7)], [])
     self.assertNotEqual(out[0:10], out[10:20])
Ejemplo n.º 24
0
 def test_iterator_shuffle_divisible(self):
     dataset = list(range(10))
     it = iterators.MultithreadIterator(dataset, 10, **self.options)
     self.assertNotEqual(it.next(), it.next())