def _get_hook(hook):
     if hook == 'WeightDecay':
         return WeightDecay(cfg.solver.weight_decay)
     elif hook == 'GradientClipping':
         return GradientClipping(cfg.solver.gradient_clipping_thresh)
     else:
         raise ValueError('Not support `hook`: {}.'.format(hook))
Example #2
0
def create_trainer(train_iter, net, gpu_id, initial_lr, weight_decay,
                   freeze_layer, small_lr_layers, small_initial_lr,
                   num_epochs_or_iter, epoch_or_iter, save_dir):
    # Optimizer
    if gpu_id >= 0:
        net.to_gpu(gpu_id)
    optimizer = optimizers.MomentumSGD(lr=initial_lr)
    optimizer.setup(net)

    if weight_decay > 0:
        optimizer.add_hook(WeightDecay(weight_decay))
    if freeze_layer:
        freeze_setup(net, optimizer, freeze_layer)

    if small_lr_layers:
        for layer_name in small_lr_layers:
            layer = getattr(net.predictor, layer_name)
            layer.W.update_rule.hyperparam.lr = small_initial_lr
            layer.b.update_rule.hyperparam.lr = small_initial_lr

    # Trainer
    updater = training.StandardUpdater(train_iter, optimizer, device=gpu_id)
    trainer = training.Trainer(updater, (num_epochs_or_iter, epoch_or_iter),
                               out=save_dir)
    return trainer
Example #3
0
def adadelta(model, args):
    """Build adadelta."""
    opt = chainer.optimizers.AdaDelta(
        rho=args.rho,
        eps=args.eps,
    )
    opt.setup(model)
    opt.add_hook(WeightDecay(args.weight_decay))
    return opt
Example #4
0
def adam(model, args):
    """Build adam."""
    opt = chainer.optimizers.Adam(
        alpha=args.lr,
        beta1=args.beta1,
        beta2=args.beta2,
    )
    opt.setup(model)
    opt.add_hook(WeightDecay(args.weight_decay))
    return opt
Example #5
0
    def from_args(target, args: argparse.Namespace):
        """Initialize optimizer from argparse Namespace.

        Args:
            target: for pytorch `model.parameters()`,
                for chainer `model`
            args (argparse.Namespace): parsed command-line args

        """
        opt = chainer.optimizers.AdaDelta(rho=args.rho, eps=args.eps,)
        opt.setup(target)
        opt.add_hook(WeightDecay(args.weight_decay))
        return opt
Example #6
0
    def from_args(target, args: argparse.Namespace):
        """Initialize optimizer from argparse Namespace.

        Args:
            target: for pytorch `model.parameters()`,
                for chainer `model`
            args (argparse.Namespace): parsed command-line args

        """
        opt = chainer.optimizers.Adam(
            alpha=args.lr, beta1=args.beta1, beta2=args.beta2,
        )
        opt.setup(target)
        opt.add_hook(WeightDecay(args.weight_decay))
        return opt
Example #7
0
def sgd(model, args):
    """Build SGD."""
    opt = chainer.optimizers.SGD(lr=args.lr, )
    opt.setup(model)
    opt.add_hook(WeightDecay(args.weight_decay))
    return opt
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument(
        '--model', choices=('resnet50', 'resnet101'))
    parser.add_argument('--batchsize', type=int, default=16)
    parser.add_argument('--out', default='result')
    parser.add_argument('--resume')
    args = parser.parse_args()

    comm = chainermn.create_communicator()
    device = comm.intra_rank

    if args.model == 'resnet50':
        model = FasterRCNNFPNResNet50(
            n_fg_class=len(coco_bbox_label_names), mean='chainercv')
        copyparams(model.extractor.base,
                   ResNet50(pretrained_model='imagenet', arch='he'))
    elif args.model == 'resnet101':
        model = FasterRCNNFPNResNet101(
            n_fg_class=len(coco_bbox_label_names), mean='chainercv')
        copyparams(model.extractor.base,
                   ResNet101(pretrained_model='imagenet', arch='he'))

    model.use_preset('evaluate')
    train_chain = TrainChain(model)
    chainer.cuda.get_device_from_id(device).use()
    train_chain.to_gpu()

    train = TransformDataset(
        ConcatenatedDataset(
            COCOBboxDataset(split='train'),
            COCOBboxDataset(split='valminusminival'),
        ), ('img', 'bbox', 'label'), transform)

    if comm.rank == 0:
        indices = np.arange(len(train))
    else:
        indices = None
    indices = chainermn.scatter_dataset(indices, comm, shuffle=True)
    train = train.slice[indices]

    train_iter = chainer.iterators.MultithreadIterator(
        train, args.batchsize // comm.size)

    optimizer = chainermn.create_multi_node_optimizer(
        chainer.optimizers.MomentumSGD(), comm)
    optimizer.setup(train_chain)
    optimizer.add_hook(WeightDecay(0.0001))

    model.extractor.base.conv1.disable_update()
    model.extractor.base.res2.disable_update()
    for link in model.links():
        if isinstance(link, L.BatchNormalization):
            link.disable_update()

    updater = training.updaters.StandardUpdater(
        train_iter, optimizer, converter=converter, device=device)
    trainer = training.Trainer(
        updater, (90000 * 16 / args.batchsize, 'iteration'), args.out)

    def lr_schedule(updater):
        base_lr = 0.02 * args.batchsize / 16
        warm_up_duration = 500
        warm_up_rate = 1 / 3

        iteration = updater.iteration
        if iteration < warm_up_duration:
            rate = warm_up_rate \
                + (1 - warm_up_rate) * iteration / warm_up_duration
        elif iteration < 60000 * 16 / args.batchsize:
            rate = 1
        elif iteration < 80000 * 16 / args.batchsize:
            rate = 0.1
        else:
            rate = 0.01

        return base_lr * rate

    trainer.extend(ManualScheduler('lr', lr_schedule))

    if comm.rank == 0:
        log_interval = 10, 'iteration'
        trainer.extend(extensions.LogReport(trigger=log_interval))
        trainer.extend(extensions.observe_lr(), trigger=log_interval)
        trainer.extend(extensions.PrintReport(
            ['epoch', 'iteration', 'lr', 'main/loss',
             'main/loss/rpn/loc', 'main/loss/rpn/conf',
             'main/loss/head/loc', 'main/loss/head/conf']),
            trigger=log_interval)
        trainer.extend(extensions.ProgressBar(update_interval=10))

        trainer.extend(extensions.snapshot(), trigger=(10000, 'iteration'))
        trainer.extend(
            extensions.snapshot_object(
                model, 'model_iter_{.updater.iteration}'),
            trigger=(90000 * 16 / args.batchsize, 'iteration'))

    if args.resume:
        serializers.load_npz(args.resume, trainer, strict=False)

    trainer.run()
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--model',
                        choices=('ssd300', 'ssd512'),
                        default='ssd300')
    parser.add_argument('--batchsize', type=int, default=32)
    parser.add_argument('--lr', type=float, default=1e-3)
    parser.add_argument('--out', default='result')
    parser.add_argument('--resume')
    args = parser.parse_args()

    comm = chainermn.create_communicator()
    device = comm.intra_rank

    if args.model == 'ssd300':
        model = SSD300(n_fg_class=len(epic_kitchens_bbox_category_names),
                       pretrained_model='imagenet')
    elif args.model == 'ssd512':
        model = SSD512(n_fg_class=len(epic_kitchens_bbox_category_names),
                       pretrained_model='imagenet')

    model.use_preset('evaluate')
    train_chain = MultiboxTrainChain(model)
    chainer.cuda.get_device_from_id(device).use()
    model.to_gpu()

    train = EpicKitchensBboxDataset(year='2018', split='train')
    if comm.rank == 0:
        indices = np.arange(len(train))
    else:
        indices = None
    train = TransformDataset(train, ('img', 'mb_loc', 'mb_label'),
                             Transform(model.coder, model.insize, model.mean))

    indices = chainermn.scatter_dataset(indices, comm, shuffle=True)
    train = train.slice[indices]

    # http://chainermn.readthedocs.io/en/latest/tutorial/tips_faqs.html#using-multiprocessiterator
    if hasattr(multiprocessing, 'set_start_method'):
        multiprocessing.set_start_method('forkserver')
    train_iter = chainer.iterators.MultiprocessIterator(train,
                                                        args.batchsize,
                                                        n_processes=2)

    optimizer = chainermn.create_multi_node_optimizer(
        chainer.optimizers.MomentumSGD(), comm)
    optimizer.setup(train_chain)
    for param in train_chain.params():
        if param.name == 'b':
            param.update_rule.add_hook(GradientScaling(2))
        else:
            param.update_rule.add_hook(WeightDecay(0.0005))

    updater = training.updaters.StandardUpdater(train_iter,
                                                optimizer,
                                                device=device)
    trainer = training.Trainer(updater, (18, 'epoch'), args.out)
    trainer.extend(extensions.ExponentialShift('lr', 0.1, init=args.lr),
                   trigger=triggers.ManualScheduleTrigger([12, 15], 'epoch'))

    if comm.rank == 0:
        log_interval = 10, 'iteration'
        trainer.extend(
            extensions.LogReport(log_name='log.json', trigger=log_interval))
        trainer.extend(extensions.observe_lr(), trigger=log_interval)
        trainer.extend(extensions.PrintReport([
            'epoch', 'iteration', 'lr', 'main/loss', 'main/loss/loc',
            'main/loss/conf'
        ]),
                       trigger=log_interval)
        trainer.extend(extensions.ProgressBar(update_interval=1))

        trainer.extend(extensions.snapshot_object(
            model, 'model_iter_{.updater.iteration}.npz'),
                       trigger=(1, 'epoch'))

    if args.resume:
        serializers.load_npz(args.resume, trainer)

    trainer.run()
def main():

    args = parse_args()
    res = Resource(args, train=True)

    train, test, train_gt, test_gt = load_train_test(
        train_dir=const.PREPROCESSED_TRAIN_DIR, gt_dir=const.XML_DIR)
    res.log_info(f'Train: {len(train)}, test: {len(test)}')

    model = ARCHS[args.model](n_fg_class=len(const.LABELS),
                              pretrained_model='imagenet')
    model.use_preset('evaluate')
    train_chain = MultiboxTrainChain(model)
    if args.gpu >= 0:
        chainer.cuda.get_device_from_id(args.gpu).use()
        model.to_gpu()

    train_dataset = TransformDataset(
        ISIC2018Task1Dataset(train, train_gt),
        Transform(model.coder, model.insize, model.mean))
    train_iter = chainer.iterators.MultithreadIterator(
        train_dataset, args.batchsize, n_threads=args.loaderjob)

    test_dataset = TransformDataset(
        ISIC2018Task1Dataset(test, test_gt),
        Transform(model.coder, model.insize, model.mean))
    test_iter = chainer.iterators.MultithreadIterator(test_dataset,
                                                      args.batchsize,
                                                      shuffle=False,
                                                      repeat=False,
                                                      n_threads=args.loaderjob)

    optimizer = chainer.optimizers.Adam()
    optimizer.setup(train_chain)
    for param in train_chain.params():
        if param.name == 'b':
            param.update_rule.add_hook(GradientScaling(2))
        else:
            param.update_rule.add_hook(WeightDecay(0.0005))

    updater = training.updaters.StandardUpdater(train_iter,
                                                optimizer,
                                                device=args.gpu)
    trainer = training.Trainer(updater, (args.epoch, 'epoch'), args.out)
    trainer.extend(
        DetectionVOCEvaluator(test_iter,
                              model,
                              use_07_metric=False,
                              label_names=const.LABELS))

    trainer.extend(extensions.LogReport())
    trainer.extend(extensions.observe_lr())
    trainer.extend(
        extensions.PrintReport([
            'epoch', 'iteration', 'main/loss', 'main/loss/loc',
            'main/loss/conf', 'validation/main/map'
        ]))
    trainer.extend(extensions.ProgressBar(update_interval=10))

    snapshot_trigger = triggers.MaxValueTrigger(key='validation/main/map')
    snapshot_object_trigger = triggers.MaxValueTrigger(
        key='validation/main/map')
    trainer.extend(extensions.snapshot(filename='snapshot_best.npz'),
                   trigger=snapshot_trigger)
    trainer.extend(extensions.snapshot_object(model, 'model_best.npz'),
                   trigger=snapshot_object_trigger)

    if args.resume:
        chainer.serializers.load_npz(args.resume, trainer)

    trainer.run()

    # save last model
    chainer.serializers.save_npz(os.path.join(args.out, 'snapshot_last.npz'),
                                 trainer)
    chainer.serializers.save_npz(os.path.join(args.out, 'model_last.npz'),
                                 model)
Example #11
0
        h = F.relu(self.fc1(x))
        h = F.relu(self.fc2(h))
        h = self.fc3(h)
        return h

net = MLP() #インスタンス化

from chainer import optimizers
from chainer.optimizer_hooks import WeightDecay
# 最適化手法の選択
optimizer = optimizers.MomentumSGD(lr=0.001, momentum=0.9)  # 学習率を 0.001 に設定
optimizer.setup(net)

for param in net.params():
    if param.name != 'b':  # バイアス以外だったら
        param.update_rule.add_hook(WeightDecay(0.0001))  # 重み減衰を適用

# ------------------------------------------------------------------------------
# 学習における設定
# エポック数(↓変更可能)
n_epoch = 401

# 表示するログの設定
results_train, results_valid = {}, {}
results_train['loss'], results_train['accuracy'] = [], []
results_valid['loss'], results_valid['accuracy'] = [], []

count = 1

train_batch = train_iter.next()
x_train, t_train = chainer.dataset.concat_examples(train_batch)
Example #12
0
def do():
    parser = argparse.ArgumentParser()
    parser.add_argument(
        '--model',choices=('ssd300','ssd512'),default='ssd300')
    parser.add_argument('--batchsize', type=int, default=8)
    parser.add_argument('--iteration', type=int, default=64)
    parser.add_argument('--step', type=int, nargs='*', default=[8,16])
    parser.add_argument('--gpu', type=int, default=-1)
    parser.add_argument('--out', default='result')
    parser.add_argument('--resume')
    args = parser.parse_args()

    model = SSD300(
        n_fg_class=len(ssdd.labels),
        pretrained_model='imagenet'
    )
    model.use_preset('evaluate')
    train_chain = MultiboxTrainChain(model)
    """
    if args.gpu >= 0:
        chainer.cuda.get_device_from_id(args.gpu).use()
        model.to_gpu()
    """
    train = TransformDataset(
        train_dataset,
        Transform(model.coder,model.insize,model.mean),
    )
    train_iter = chainer.iterators.MultiprocessIterator(train,args.batchsize)

    test = test_dataset
    test_iter = chainer.iterators.SerialIterator(test, args.batchsize, repeat=False,shuffle=False)

    optimizer = chainer.optimizers.MomentumSGD()
    optimizer.setup(train_chain)
    for param in train_chain.params():
        if param.name == 'b':
            param.update_rule.add_hook(GradientScaling(2))
        else:
            param.update_rule.add_hook(WeightDecay(0.0005))
    
    updater = training.updaters.StandardUpdater(
        train_iter, optimizer, device=args.gpu)
    trainer = training.Trainer(updater,(args.iteration, 'iteration'),args.out)
    trainer.extend(
        extensions.ExponentialShift('lr', 0.1, init=1e-3),
        trigger= triggers.ManualScheduleTrigger(args.step, 'iteration')
    )
    """
    trainer.extend(
        extensions.Evaluator(
            test_iter, model
        ),
        trigger=triggers.ManualScheduleTrigger(
            args.step + [args.iteration], 'iteration'
        )
    )
    """
    trainer.extend(extensions.ProgressBar(update_interval=1))
    #trainer.extend(extensions.LogReport(trigger=1))
    #trainer.extend(extensions.observe_lr(), trigger=1)
    #trainer.extend(extensions.PrintReport(
    #    ['epoch', 'iteration', 'lr',
    #    'main/loss', 'main/loss/loc', 'main/loss/conf',
    #    'validation/main/map', 'elapsed_time']),
    #    trigger=1)
    if extensions.PlotReport.available():
        trainer.extend(
            extensions.PlotReport(
                ['main/loss', 'main/loss/loc', 'main/loss/conf'],
                'epoch', file_name='loss.png'))
        trainer.extend(
            extensions.PlotReport(
                ['validation/main/map'],
                'epoch', file_name='accuracy.png'))
    trainer.extend(extensions.snapshot(
        filename='snapshot_iter_{.updater.epoch}.npz'), 
        trigger=(4, 'iteration')
    )

    trainer.run()
def handler(context):
    dataset_alias = context.datasets
    data = list(load_dataset_from_api(dataset_alias['train']))

    np.random.seed(0)
    data = np.random.permutation(data)
    nb_data = len(data)
    nb_train = int(7 * nb_data // 10)
    train_data_raw = data[:nb_train]
    test_data_raw = data[nb_train:]

    premodel = SSD300(n_fg_class=20, pretrained_model='voc0712')
    model = SSD300(n_fg_class=1)

    copy_ssd(model, premodel)

    model.use_preset('evaluate')
    train_chain = MultiboxTrainChain(model)
    if USE_GPU >= 0:
        chainer.cuda.get_device_from_id(USE_GPU).use()
        model.to_gpu()

    # initial lr is set to 1e-3 by ExponentialShift
    optimizer = chainer.optimizers.MomentumSGD()
    optimizer.setup(train_chain)
    for param in train_chain.params():
        if param.name == 'b':
            param.update_rule.add_hook(GradientScaling(2))
        else:
            param.update_rule.add_hook(WeightDecay(0.0005))

    fix_ssd(train_chain)

    train_data = DetectionDatasetFromAPI(train_data_raw)
    test_data = DetectionDatasetFromAPI(test_data_raw,
                                        use_difficult=True,
                                        return_difficult=True)

    train_data = TransformDataset(
        train_data, Transform(model.coder, model.insize, model.mean))
    train_iter = chainer.iterators.SerialIterator(train_data, BATCHSIZE)

    test_iter = chainer.iterators.SerialIterator(test_data,
                                                 BATCHSIZE,
                                                 repeat=False,
                                                 shuffle=False)

    updater = training.updaters.StandardUpdater(train_iter,
                                                optimizer,
                                                device=USE_GPU)
    trainer = training.Trainer(updater, (nb_epochs, 'epoch'),
                               out=ABEJA_TRAINING_RESULT_DIR)
    trainer.extend(extensions.ExponentialShift('lr', 0.1, init=1e-3),
                   trigger=triggers.ManualScheduleTrigger([1200, 1600],
                                                          'epoch'))

    trainer.extend(DetectionVOCEvaluator(test_iter,
                                         model,
                                         use_07_metric=True,
                                         label_names=['cup']),
                   trigger=(1, 'epoch'))

    log_interval = 1, 'epoch'
    trainer.extend(extensions.LogReport(trigger=log_interval))

    print_entries = [
        'epoch', 'main/loss', 'main/loss/loc', 'main/loss/conf',
        'validation/main/map'
    ]
    report_entries = [
        'epoch', 'lr', 'main/loss', 'main/loss/loc', 'main/loss/conf',
        'validation/main/map'
    ]

    trainer.extend(Statistics(report_entries, nb_epochs), trigger=log_interval)
    trainer.extend(Tensorboard(report_entries, out_dir=log_path))
    trainer.extend(extensions.PrintReport(print_entries), trigger=log_interval)

    trainer.extend(extensions.snapshot_object(model,
                                              'model_epoch_{.updater.epoch}'),
                   trigger=(nb_epochs, 'epoch'))

    trainer.run()
Example #14
0
def main():
    rospack = rospkg.RosPack()
    jsk_perception_datasets_path = osp.join(
        rospack.get_path('jsk_perception'), 'learning_datasets')

    parser = argparse.ArgumentParser()
    # Dataset directory
    parser.add_argument('--train-dataset-dir', type=str,
                        default=osp.join(jsk_perception_datasets_path,
                                         'kitchen_dataset', 'train'))
    parser.add_argument('--val-dataset-dir', type=str,
                        default=osp.join(jsk_perception_datasets_path,
                                         'kitchen_dataset', 'test'))
    parser.add_argument('--dataset-type', type=str,
                        default='instance')
    parser.add_argument(
        '--model-name', choices=('ssd300', 'ssd512'), default='ssd512')
    parser.add_argument('--gpu', type=int, default=0)
    parser.add_argument('--batch-size', type=int, default=8)
    parser.add_argument('--max-epoch', type=int, default=100)
    parser.add_argument('--out-dir', type=str, default=None)
    args = parser.parse_args()

    if (args.dataset_type == 'instance'):
        train_dataset = DetectionDataset(args.train_dataset_dir)
    elif (args.dataset_type == 'bbox'):
        train_dataset = BboxDetectionDataset(args.train_dataset_dir)
    else:
        print('unsuppported dataset type')
        return

    fg_label_names = train_dataset.fg_class_names

    if args.model_name == 'ssd300':
        model = SSD300(
            n_fg_class=len(fg_label_names),
            pretrained_model='imagenet')
    elif args.model_name == 'ssd512':
        model = SSD512(
            n_fg_class=len(fg_label_names),
            pretrained_model='imagenet')

    model.use_preset('evaluate')
    train_chain = MultiboxTrainChain(model)
    if args.gpu >= 0:
        chainer.cuda.get_device_from_id(args.gpu).use()
        model.to_gpu()

    train = TransformDataset(
        train_dataset,
        Transform(model.coder, model.insize, model.mean))
    train_iter = chainer.iterators.MultiprocessIterator(train, args.batch_size)

    if (args.dataset_type == 'instance'):
        test_dataset = DetectionDataset(args.val_dataset_dir)
    elif (args.dataset_type == 'bbox'):
        test_dataset = BboxDetectionDataset(args.val_dataset_dir)

    test_iter = chainer.iterators.SerialIterator(
        test_dataset, args.batch_size, repeat=False, shuffle=False)

    # initial lr is set to 1e-3 by ExponentialShift
    optimizer = chainer.optimizers.MomentumSGD()
    optimizer.setup(train_chain)
    for param in train_chain.params():
        if param.name == 'b':
            param.update_rule.add_hook(GradientScaling(2))
        else:
            param.update_rule.add_hook(WeightDecay(0.0005))

    updater = training.updaters.StandardUpdater(
        train_iter, optimizer, device=args.gpu)

    now = datetime.datetime.now()
    timestamp = now.strftime('%Y%m%d-%H%M%S')
    if args.out_dir is None:
        out_dir = osp.join(
            rospkg.get_ros_home(), 'learning_logs', timestamp)
    else:
        out_dir = args.out_dir

    step_epoch = [args.max_epoch * 2 // 3, args.max_epoch * 5 // 6]
    trainer = training.Trainer(
        updater, (args.max_epoch, 'epoch'), out_dir)
    trainer.extend(
        extensions.ExponentialShift('lr', 0.1, init=1e-3),
        trigger=triggers.ManualScheduleTrigger(step_epoch, 'epoch'))

    trainer.extend(
        DetectionVOCEvaluator(
            test_iter, model, use_07_metric=True,
            label_names=fg_label_names),
        trigger=triggers.ManualScheduleTrigger(
            step_epoch + [args.max_epoch], 'epoch'))

    log_interval = 10, 'iteration'
    trainer.extend(
        extensions.LogReport(log_name='log.json', trigger=log_interval))
    trainer.extend(extensions.observe_lr(), trigger=log_interval)
    trainer.extend(extensions.PrintReport(
        ['epoch', 'iteration', 'lr',
         'main/loss', 'main/loss/loc', 'main/loss/conf',
         'validation/main/map']),
        trigger=log_interval)
    trainer.extend(extensions.ProgressBar(update_interval=10))

    trainer.extend(
        extensions.snapshot_object(
            model, 'model_snapshot.npz'),
        trigger=(args.max_epoch, 'epoch'))

    trainer.run()
Example #15
0
def main():

    # cuDNNのautotuneを有効にする
    chainer.cuda.set_max_workspace_size(512 * 1024 * 1024)
    chainer.config.autotune = True

    gpu_id = 0
    batchsize = 6
    out_num = 'results'
    log_interval = 1, 'epoch'
    epoch_max = 500
    initial_lr = 0.0001
    lr_decay_rate = 0.1
    lr_decay_timing = [200, 300, 400]

    # モデルの設定
    model = SSD300(n_fg_class=len(voc_labels), pretrained_model='imagenet')
    model.use_preset('evaluate')
    train_chain = MultiboxTrainChain(model)

    # GPUの設定
    chainer.cuda.get_device_from_id(gpu_id).use()
    model.to_gpu()

    # データセットの設定
    train_dataset = MyVoTTVOCDataset(
        'C:\Python_Programs\chainer_practice\Telescope_corner', 'train')
    valid_dataset = MyVoTTVOCDataset(
        'C:\Python_Programs\chainer_practice\Telescope_corner', 'val')

    # データ拡張
    transformed_train_dataset = TransformDataset(
        train_dataset, Transform(model.coder, model.insize, model.mean))

    # イテレーターの設定
    train_iter = chainer.iterators.MultiprocessIterator(
        transformed_train_dataset, batchsize)
    valid_iter = chainer.iterators.SerialIterator(valid_dataset,
                                                  batchsize,
                                                  repeat=False,
                                                  shuffle=False)

    # オプティマイザーの設定
    optimizer = chainer.optimizers.MomentumSGD()
    optimizer.setup(train_chain)
    for param in train_chain.params():
        if param.name == 'b':
            param.update_rule.add_hook(GradientScaling(2))
        else:
            param.update_rule.add_hook(WeightDecay(0.0005))

    # アップデーターの設定
    updater = training.updaters.StandardUpdater(train_iter,
                                                optimizer,
                                                device=gpu_id)

    # トレーナーの設定
    trainer = training.Trainer(updater, (epoch_max, 'epoch'), out_num)
    trainer.extend(extensions.ExponentialShift('lr',
                                               lr_decay_rate,
                                               init=initial_lr),
                   trigger=triggers.ManualScheduleTrigger(
                       lr_decay_timing, 'epoch'))
    trainer.extend(DetectionVOCEvaluator(valid_iter,
                                         model,
                                         use_07_metric=False,
                                         label_names=voc_labels),
                   trigger=(1, 'epoch'))
    trainer.extend(extensions.LogReport(trigger=log_interval))
    trainer.extend(extensions.observe_lr(), trigger=log_interval)
    trainer.extend(extensions.PrintReport([
        'epoch', 'iteration', 'lr', 'main/loss', 'main/loss/loc',
        'main/loss/conf', 'validation/main/map', 'elapsed_time'
    ]),
                   trigger=log_interval)

    if extensions.PlotReport.available():
        trainer.extend(
            extensions.PlotReport(
                ['main/loss', 'main/loss/loc', 'main/loss/conf'],
                'epoch',
                file_name='loss.png'))
        trainer.extend(
            extensions.PlotReport(['validation/main/map'],
                                  'epoch',
                                  file_name='accuracy.png'))
    trainer.extend(
        extensions.snapshot(filename='snapshot_epoch_{.updater.epoch}.npz'),
        trigger=(10, 'epoch'))

    # 途中で止めた学習を再開する場合は、trainerにスナップショットをロードして再開する
    # serializers.load_npz('results/snapshot_epoch_100.npz', trainer)

    # 学習実行
    trainer.run()

    # 学習データの保存
    model.to_cpu()
    serializers.save_npz('my_ssd_model.npz', model)
                'loss': loss,
                'loss/loc': loc_loss,
                'loss/conf': conf_loss
            }, self)
        return loss


train_chain = MultiboxTrainChain(model)

optimizer = chainer.optimizers.MomentumSGD(1e-3)
optimizer.setup(train_chain)
for param in train_chain.params():
    if param.name == 'b':
        param.update_rule.add_hook(GradientScaling(2))
    else:
        param.update_rule.add_hook(WeightDecay(0.0005))

evaluator = DetectionVOCEvaluator(
    test_iter,
    model,
    use_07_metric=True,
    label_names=voc_bbox_label_names,
)


class save_model(training.Extension):  #ver 1.2
    def __init__(self,
                 model,
                 save_name,
                 before_iter=0,
                 saved_dir='saved_model/',
Example #17
0
def main():
    parser = argparse.ArgumentParser(
        formatter_class=argparse.ArgumentDefaultsHelpFormatter, )
    parser.add_argument("--batchsize", type=int, default=16, help="batch size")
    parser.add_argument("--out", default="logs", help="logs")
    parser.add_argument("--resume", help="resume")
    args = parser.parse_args()

    # https://docs.chainer.org/en/stable/chainermn/tutorial/tips_faqs.html#using-multiprocessiterator
    if hasattr(multiprocessing, "set_start_method"):
        multiprocessing.set_start_method("forkserver")
        p = multiprocessing.Process()
        p.start()
        p.join()

    comm = chainermn.create_communicator("pure_nccl")
    device = comm.intra_rank

    class_names = morefusion.datasets.ycb_video.class_names
    fg_class_names = class_names[1:]
    model = MaskRCNNFPNResNet50(n_fg_class=len(fg_class_names),
                                pretrained_model="imagenet")
    model_coco = MaskRCNNFPNResNet50(pretrained_model="coco")
    _copyparams(model, model_coco)

    model.use_preset("evaluate")
    train_chain = TrainChain(model)
    chainer.cuda.get_device_from_id(device).use()
    train_chain.to_gpu()

    if comm.rank == 0:
        train = chainer.datasets.ConcatenatedDataset(
            morefusion.datasets.YCBVideoInstanceSegmentationDataset(
                split="train", sampling=15),
            morefusion.datasets.YCBVideoSyntheticInstanceSegmentationDataset(
                bg_composite=True),
            morefusion.datasets.
            MySyntheticYCB20190916InstanceSegmentationDataset(  # NOQA
                "train", bg_composite=True),
        )
        train = transform_dataset(train, model, train=True)
        val = morefusion.datasets.YCBVideoInstanceSegmentationDataset(
            split="keyframe", sampling=1)
        val = transform_dataset(val, model, train=False)
    else:
        train = None
        val = None
    train = chainermn.scatter_dataset(train, comm, shuffle=True)
    val = chainermn.scatter_dataset(val, comm, shuffle=False)

    train_iter = chainer.iterators.MultiprocessIterator(
        train,
        args.batchsize // comm.size,
        n_processes=args.batchsize // comm.size,
        shared_mem=100 * 1000 * 1000 * 4,
    )
    val_iter = chainer.iterators.MultiprocessIterator(
        val,
        args.batchsize // comm.size,
        n_processes=args.batchsize // comm.size,
        shared_mem=100 * 1000 * 1000 * 4,
        shuffle=False,
        repeat=False,
    )

    optimizer = chainermn.create_multi_node_optimizer(
        chainer.optimizers.MomentumSGD(), comm)
    optimizer.setup(train_chain)
    optimizer.add_hook(WeightDecay(0.0001))

    for link in model.links():
        if isinstance(link, L.BatchNormalization):
            link.disable_update()
    model.extractor.disable_update()
    model.rpn.disable_update()

    for name, link in model.namedlinks():
        print(name, link.update_enabled)

    updater = training.updaters.StandardUpdater(train_iter,
                                                optimizer,
                                                converter=converter,
                                                device=device)
    max_epoch = (180e3 * 8) / 118287
    trainer = training.Trainer(updater, (max_epoch, "epoch"), args.out)

    @make_shift("lr")
    def lr_schedule(trainer):
        base_lr = 0.02 * args.batchsize / 16
        warm_up_duration = 500
        warm_up_rate = 1 / 3

        iteration = trainer.updater.iteration
        if iteration < warm_up_duration:
            rate = (warm_up_rate +
                    (1 - warm_up_rate) * iteration / warm_up_duration)
        else:
            rate = 1
            for step in [120e3 / 180e3 * max_epoch, 160e3 / 180e3 * max_epoch]:
                if trainer.updater.epoch_detail >= step:
                    rate *= 0.1

        return base_lr * rate

    trainer.extend(lr_schedule)

    val_interval = 10000, "iteration"
    evaluator = InstanceSegmentationCOCOEvaluator(val_iter, model)
    evaluator = chainermn.create_multi_node_evaluator(evaluator, comm)
    trainer.extend(evaluator, trigger=val_interval)

    if comm.rank == 0:
        log_interval = 10, "iteration"
        trainer.extend(extensions.LogReport(trigger=log_interval))
        trainer.extend(extensions.observe_lr(), trigger=log_interval)
        keys = [
            "epoch",
            "iteration",
            "lr",
            "main/loss",
            "main/loss/rpn/loc",
            "main/loss/rpn/conf",
            "main/loss/bbox_head/loc",
            "main/loss/bbox_head/conf",
            "main/loss/mask_head",
            "validation/main/map/iou=0.50:0.95/area=all/max_dets=100",
        ]
        trainer.extend(extensions.PrintReport(keys), trigger=log_interval)
        trainer.extend(extensions.ProgressBar(update_interval=10))

        # trainer.extend(extensions.snapshot(), trigger=(10000, 'iteration'))
        trainer.extend(
            extensions.snapshot_object(model, "model_iter_best"),
            trigger=training.triggers.MaxValueTrigger(
                "validation/main/map/iou=0.50:0.95/area=all/max_dets=100",
                trigger=val_interval,
            ),
        )
        trainer.extend(
            extensions.snapshot_object(model,
                                       "model_iter_{.updater.iteration}"),
            trigger=(max_epoch, "epoch"),
        )

    if args.resume:
        serializers.load_npz(args.resume, trainer, strict=False)

    trainer.run()
Example #18
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--model',
                        choices=('ssd300', 'ssd512'),
                        default='ssd300')
    parser.add_argument('--batchsize', type=int, default=32)
    parser.add_argument('--np', type=int, default=8)
    parser.add_argument('--test-batchsize', type=int, default=16)
    parser.add_argument('--iteration', type=int, default=120000)
    parser.add_argument('--step', type=int, nargs='*', default=[80000, 100000])
    parser.add_argument('--lr', type=float, default=1e-3)
    parser.add_argument('--out', default='result')
    parser.add_argument('--resume')
    parser.add_argument('--dtype',
                        type=str,
                        choices=dtypes.keys(),
                        default='float32',
                        help='Select the data type of the model')
    parser.add_argument('--model-dir',
                        default=None,
                        type=str,
                        help='Where to store models')
    parser.add_argument('--dataset-dir',
                        default=None,
                        type=str,
                        help='Where to store datasets')
    parser.add_argument('--dynamic-interval',
                        default=None,
                        type=int,
                        help='Interval for dynamic loss scaling')
    parser.add_argument('--init-scale',
                        default=1,
                        type=float,
                        help='Initial scale for ada loss')
    parser.add_argument('--loss-scale-method',
                        default='approx_range',
                        type=str,
                        help='Method for adaptive loss scaling')
    parser.add_argument('--scale-upper-bound',
                        default=16,
                        type=float,
                        help='Hard upper bound for each scale factor')
    parser.add_argument('--accum-upper-bound',
                        default=1024,
                        type=float,
                        help='Accumulated upper bound for all scale factors')
    parser.add_argument('--update-per-n-iteration',
                        default=1,
                        type=int,
                        help='Update the loss scale value per n iteration')
    parser.add_argument('--snapshot-per-n-iteration',
                        default=10000,
                        type=int,
                        help='The frequency of taking snapshots')
    parser.add_argument('--n-uf', default=1e-3, type=float)
    parser.add_argument('--nosanity-check', default=False, action='store_true')
    parser.add_argument('--nouse-fp32-update',
                        default=False,
                        action='store_true')
    parser.add_argument('--profiling', default=False, action='store_true')
    parser.add_argument('--verbose',
                        action='store_true',
                        default=False,
                        help='Verbose output')
    args = parser.parse_args()

    # https://docs.chainer.org/en/stable/chainermn/tutorial/tips_faqs.html#using-multiprocessiterator
    if hasattr(multiprocessing, 'set_start_method'):
        multiprocessing.set_start_method('forkserver')
        p = multiprocessing.Process()
        p.start()
        p.join()

    comm = chainermn.create_communicator('pure_nccl')
    device = comm.intra_rank

    # Set up workspace
    # 12 GB GPU RAM for workspace
    chainer.cuda.set_max_workspace_size(16 * 1024 * 1024 * 1024)
    chainer.global_config.cv_resize_backend = 'cv2'

    # Setup the data type
    # when initializing models as follows, their data types will be casted.
    # Weethave to forbid the usage of cudnn
    if args.dtype != 'float32':
        chainer.global_config.use_cudnn = 'never'
    chainer.global_config.dtype = dtypes[args.dtype]
    print('==> Setting the data type to {}'.format(args.dtype))

    if args.model_dir is not None:
        chainer.dataset.set_dataset_root(args.model_dir)
    if args.model == 'ssd300':
        model = SSD300(n_fg_class=len(voc_bbox_label_names),
                       pretrained_model='imagenet')
    elif args.model == 'ssd512':
        model = SSD512(n_fg_class=len(voc_bbox_label_names),
                       pretrained_model='imagenet')

    model.use_preset('evaluate')

    ######################################
    # Setup model
    #######################################
    # Apply ada loss transform
    recorder = AdaLossRecorder(sample_per_n_iter=100)
    profiler = Profiler()
    sanity_checker = SanityChecker(
        check_per_n_iter=100) if not args.nosanity_check else None
    # Update the model to support AdaLoss
    # TODO: refactorize
    model_ = AdaLossScaled(
        model,
        init_scale=args.init_scale,
        cfg={
            'loss_scale_method': args.loss_scale_method,
            'scale_upper_bound': args.scale_upper_bound,
            'accum_upper_bound': args.accum_upper_bound,
            'update_per_n_iteration': args.update_per_n_iteration,
            'recorder': recorder,
            'profiler': profiler,
            'sanity_checker': sanity_checker,
            'n_uf_threshold': args.n_uf,
            # 'power_of_two': False,
        },
        transforms=[
            AdaLossTransformLinear(),
            AdaLossTransformConvolution2D(),
        ],
        verbose=args.verbose)

    if comm.rank == 0:
        print(model)

    train_chain = MultiboxTrainChain(model_, comm=comm)
    chainer.cuda.get_device_from_id(device).use()

    # to GPU
    model.coder.to_gpu()
    model.extractor.to_gpu()
    model.multibox.to_gpu()

    shared_mem = 100 * 1000 * 1000 * 4

    if args.dataset_dir is not None:
        chainer.dataset.set_dataset_root(args.dataset_dir)
    train = TransformDataset(
        ConcatenatedDataset(VOCBboxDataset(year='2007', split='trainval'),
                            VOCBboxDataset(year='2012', split='trainval')),
        ('img', 'mb_loc', 'mb_label'),
        Transform(model.coder,
                  model.insize,
                  model.mean,
                  dtype=dtypes[args.dtype]))

    if comm.rank == 0:
        indices = np.arange(len(train))
    else:
        indices = None
    indices = chainermn.scatter_dataset(indices, comm, shuffle=True)
    train = train.slice[indices]

    train_iter = chainer.iterators.MultiprocessIterator(train,
                                                        args.batchsize //
                                                        comm.size,
                                                        n_processes=8,
                                                        n_prefetch=2,
                                                        shared_mem=shared_mem)

    if comm.rank == 0:  # NOTE: only performed on the first device
        test = VOCBboxDataset(year='2007',
                              split='test',
                              use_difficult=True,
                              return_difficult=True)
        test_iter = chainer.iterators.SerialIterator(test,
                                                     args.test_batchsize,
                                                     repeat=False,
                                                     shuffle=False)

    # initial lr is set to 1e-3 by ExponentialShift
    optimizer = chainermn.create_multi_node_optimizer(
        chainer.optimizers.MomentumSGD(), comm)
    if args.dtype == 'mixed16':
        if not args.nouse_fp32_update:
            print('==> Using FP32 update for dtype=mixed16')
            optimizer.use_fp32_update()  # by default use fp32 update

        # HACK: support skipping update by existing loss scaling functionality
        if args.dynamic_interval is not None:
            optimizer.loss_scaling(interval=args.dynamic_interval, scale=None)
        else:
            optimizer.loss_scaling(interval=float('inf'), scale=None)
            optimizer._loss_scale_max = 1.0  # to prevent actual loss scaling

    optimizer.setup(train_chain)
    for param in train_chain.params():
        if param.name == 'b':
            param.update_rule.add_hook(GradientScaling(2))
        else:
            param.update_rule.add_hook(WeightDecay(0.0005))

    updater = training.updaters.StandardUpdater(train_iter,
                                                optimizer,
                                                device=device)
    # if args.dtype == 'mixed16':
    #     updater.loss_scale = 8
    iteration_interval = (args.iteration, 'iteration')

    trainer = training.Trainer(updater, iteration_interval, args.out)
    # trainer.extend(extensions.ExponentialShift('lr', 0.1, init=args.lr),
    #                trigger=triggers.ManualScheduleTrigger(
    #                    args.step, 'iteration'))
    if args.batchsize != 32:
        warmup_attr_ratio = 0.1
        # NOTE: this is confusing but it means n_iter
        warmup_n_epoch = 1000
        lr_shift = chainerlp.extensions.ExponentialShift(
            'lr',
            0.1,
            init=args.lr * warmup_attr_ratio,
            warmup_attr_ratio=warmup_attr_ratio,
            warmup_n_epoch=warmup_n_epoch,
            schedule=args.step)
        trainer.extend(lr_shift, trigger=(1, 'iteration'))

    if comm.rank == 0:
        if not args.profiling:
            trainer.extend(DetectionVOCEvaluator(
                test_iter,
                model,
                use_07_metric=True,
                label_names=voc_bbox_label_names),
                           trigger=triggers.ManualScheduleTrigger(
                               args.step + [args.iteration], 'iteration'))

        log_interval = 10, 'iteration'
        trainer.extend(extensions.LogReport(trigger=log_interval))
        trainer.extend(extensions.observe_lr(), trigger=log_interval)
        trainer.extend(extensions.observe_value(
            'loss_scale',
            lambda trainer: trainer.updater.get_optimizer('main')._loss_scale),
                       trigger=log_interval)

        metrics = [
            'epoch', 'iteration', 'lr', 'main/loss', 'main/loss/loc',
            'main/loss/conf', 'validation/main/map'
        ]
        if args.dynamic_interval is not None:
            metrics.insert(2, 'loss_scale')

        trainer.extend(extensions.PrintReport(metrics), trigger=log_interval)
        trainer.extend(extensions.ProgressBar(update_interval=10))

        trainer.extend(extensions.snapshot(),
                       trigger=(args.snapshot_per_n_iteration, 'iteration'))
        trainer.extend(extensions.snapshot_object(
            model, 'model_iter_{.updater.iteration}'),
                       trigger=(args.iteration, 'iteration'))

    if args.resume:
        serializers.load_npz(args.resume, trainer)

    hook = AdaLossMonitor(sample_per_n_iter=100,
                          verbose=args.verbose,
                          includes=['Grad', 'Deconvolution'])
    recorder.trainer = trainer
    hook.trainer = trainer

    with ExitStack() as stack:
        if comm.rank == 0:
            stack.enter_context(hook)
        trainer.run()

    # store recorded results
    if comm.rank == 0:  # NOTE: only export in the first rank
        recorder.export().to_csv(os.path.join(args.out, 'loss_scale.csv'))
        profiler.export().to_csv(os.path.join(args.out, 'profile.csv'))
        if sanity_checker:
            sanity_checker.export().to_csv(
                os.path.join(args.out, 'sanity_check.csv'))
        hook.export_history().to_csv(os.path.join(args.out, 'grad_stats.csv'))
Example #19
0
def main():
    # Start the multiprocessing environment
    # https://docs.chainer.org/en/stable/chainermn/tutorial/tips_faqs.html#using-multiprocessiterator
    if hasattr(multiprocessing, 'set_start_method'):
        multiprocessing.set_start_method('forkserver')
        p = multiprocessing.Process()
        p.start()
        p.join()

    # Set up workspace
    # 12 GB GPU RAM for workspace
    chainer.cuda.set_max_workspace_size(16 * 1024 * 1024 * 1024)

    # Setup the multi-node environment
    comm = chainermn.create_communicator(args.communicator)
    device = comm.intra_rank
    print(
        '==> Successfully setup communicator: "{}" rank: {} device: {} size: {}'
        .format(args.communicator, comm.rank, device, comm.size))
    set_random_seed(args, device)

    # Setup LR
    if args.lr is not None:
        lr = args.lr
    else:
        lr = 0.1 * (args.batchsize * comm.size) / 256  # TODO: why?
        if comm.rank == 0:
            print(
                'LR = {} is selected based on the linear scaling rule'.format(
                    lr))

    # Setup dataset
    train_dir = os.path.join(args.dataset_dir, 'train')
    val_dir = os.path.join(args.dataset_dir, 'val')
    label_names = datasets.directory_parsing_label_names(train_dir)
    train_data = datasets.DirectoryParsingLabelDataset(train_dir)
    val_data = datasets.DirectoryParsingLabelDataset(val_dir)
    train_data = TransformDataset(train_data, ('img', 'label'),
                                  TrainTransform(_mean, args))
    val_data = TransformDataset(val_data, ('img', 'label'),
                                ValTransform(_mean, args))
    print('==> [{}] Successfully finished loading dataset'.format(comm.rank))

    # Initializing dataset iterators
    if comm.rank == 0:
        train_indices = np.arange(len(train_data))
        val_indices = np.arange(len(val_data))
    else:
        train_indices = None
        val_indices = None

    train_indices = chainermn.scatter_dataset(train_indices,
                                              comm,
                                              shuffle=True)
    val_indices = chainermn.scatter_dataset(val_indices, comm, shuffle=True)
    train_data = train_data.slice[train_indices]
    val_data = val_data.slice[val_indices]
    train_iter = chainer.iterators.MultiprocessIterator(
        train_data, args.batchsize, n_processes=args.loaderjob)
    val_iter = iterators.MultiprocessIterator(val_data,
                                              args.batchsize,
                                              repeat=False,
                                              shuffle=False,
                                              n_processes=args.loaderjob)

    # Create the model
    kwargs = {}
    if args.first_bn_mixed16 and args.dtype == 'float16':
        print('==> Setting the first BN layer to mixed16')
        kwargs['first_bn_mixed16'] = True

    # Initialize the model
    net = models.__dict__[args.arch](n_class=len(label_names), **kwargs)
    # Following https://arxiv.org/pdf/1706.02677.pdf,
    # the gamma of the last BN of each resblock is initialized by zeros.
    for l in net.links():
        if isinstance(l, Bottleneck):
            l.conv3.bn.gamma.data[:] = 0

    # Apply ada loss transform
    recorder = AdaLossRecorder(sample_per_n_iter=100)
    # Update the model to support AdaLoss
    net = AdaLossScaled(net,
                        init_scale=args.init_scale,
                        cfg={
                            'loss_scale_method': args.loss_scale_method,
                            'scale_upper_bound': args.scale_upper_bound,
                            'accum_upper_bound': args.accum_upper_bound,
                            'update_per_n_iteration':
                            args.update_per_n_iteration,
                            'recorder': recorder,
                        },
                        transforms=[
                            AdaLossTransformLinear(),
                            AdaLossTransformBottleneck(),
                            AdaLossTransformBasicBlock(),
                            AdaLossTransformConv2DBNActiv(),
                        ],
                        verbose=args.verbose)

    if comm.rank == 0:  # print network only in the 1-rank machine
        print(net)
    net = L.Classifier(net)
    hook = AdaLossMonitor(sample_per_n_iter=100,
                          verbose=args.verbose,
                          includes=['Grad', 'Deconvolution'])

    # Setup optimizer
    optim = chainermn.create_multi_node_optimizer(
        optimizers.CorrectedMomentumSGD(lr=lr, momentum=args.momentum), comm)
    if args.dtype == 'mixed16':
        print('==> Using FP32 update for dtype=mixed16')
        optim.use_fp32_update()  # by default use fp32 update

        # HACK: support skipping update by existing loss scaling functionality
        if args.dynamic_interval is not None:
            optim.loss_scaling(interval=args.dynamic_interval, scale=None)
        else:
            optim.loss_scaling(interval=float('inf'), scale=None)
            optim._loss_scale_max = 1.0  # to prevent actual loss scaling

    optim.setup(net)

    # setup weight decay
    for param in net.params():
        if param.name not in ('beta', 'gamma'):
            param.update_rule.add_hook(WeightDecay(args.weight_decay))

    # allocate model to multiple GPUs
    if device >= 0:
        chainer.cuda.get_device(device).use()
        net.to_gpu()

    # Create an updater that implements how to update based on one train_iter input
    updater = chainer.training.StandardUpdater(train_iter,
                                               optim,
                                               device=device)
    # Setup Trainer
    stop_trigger = (args.epoch, 'epoch')
    if args.iter is not None:
        stop_trigger = (args.iter, 'iteration')
    trainer = training.Trainer(updater, stop_trigger, out=args.out)

    @make_shift('lr')
    def warmup_and_exponential_shift(trainer):
        """ LR schedule for training ResNet especially.
        NOTE: lr should be within the context.
        """
        epoch = trainer.updater.epoch_detail
        warmup_epoch = 5  # NOTE: mentioned the original ResNet paper.
        if epoch < warmup_epoch:
            if lr > 0.1:
                warmup_rate = 0.1 / lr
                rate = warmup_rate \
                    + (1 - warmup_rate) * epoch / warmup_epoch
            else:
                rate = 1
        elif epoch < 30:
            rate = 1
        elif epoch < 60:
            rate = 0.1
        elif epoch < 80:
            rate = 0.01
        else:
            rate = 0.001
        return rate * lr

    trainer.extend(warmup_and_exponential_shift)
    evaluator = chainermn.create_multi_node_evaluator(
        extensions.Evaluator(val_iter, net, device=device), comm)
    trainer.extend(evaluator, trigger=(1, 'epoch'))

    log_interval = 0.1, 'epoch'
    print_interval = 0.1, 'epoch'

    if comm.rank == 0:
        print('==========================================')
        print('Num process (COMM_WORLD): {}'.format(comm.size))
        print('Using {} communicator'.format(args.communicator))
        print('Num Minibatch-size: {}'.format(args.batchsize))
        print('Num epoch: {}'.format(args.epoch))
        print('==========================================')

        trainer.extend(chainer.training.extensions.observe_lr(),
                       trigger=log_interval)

        # NOTE: may take snapshot every iteration now
        snapshot_label = 'epoch' if args.iter is None else 'iteration'
        snapshot_trigger = (args.snapshot_freq, snapshot_label)
        snapshot_filename = ('snapshot_' + snapshot_label + '_{.updater.' +
                             snapshot_label + '}.npz')
        trainer.extend(extensions.snapshot(filename=snapshot_filename),
                       trigger=snapshot_trigger)

        trainer.extend(extensions.LogReport(trigger=log_interval))
        trainer.extend(extensions.observe_value(
            'loss_scale',
            lambda trainer: trainer.updater.get_optimizer('main')._loss_scale),
                       trigger=log_interval)
        trainer.extend(extensions.PrintReport([
            'iteration', 'epoch', 'elapsed_time', 'lr', 'loss_scale',
            'main/loss', 'validation/main/loss', 'main/accuracy',
            'validation/main/accuracy'
        ]),
                       trigger=print_interval)
        trainer.extend(extensions.ProgressBar(update_interval=10))

    if args.resume:
        serializers.load_npz(args.resume, trainer)

    recorder.trainer = trainer
    hook.trainer = trainer
    with ExitStack() as stack:
        if comm.rank == 0:
            stack.enter_context(hook)
        trainer.run()

    # store recorded results
    if comm.rank == 0:  # NOTE: only export in the first rank
        recorder.export().to_csv(os.path.join(args.out, 'loss_scale.csv'))
        hook.export_history().to_csv(os.path.join(args.out, 'grad_stats.csv'))
Example #20
0
def main():
    # chainer.config.autotune = True
    # chainer.config.cudnn_fast_batch_normalization = True

    print("dataset", CONFIG.dataset)
    print("output_dir:", output_dir)

    if CONFIG.dataset == "tennis_serve":
        dataset = load_penn_action(dataset_dir=CONFIG.dataset_path,
                                   stride=CONFIG.penn_action.stride,
                                   dict_ok=False)
        dataset_train = dataset[:115]
        dataset_test = dataset[115:]
    elif CONFIG.dataset == "pouring":
        dataset_train, dataset_test = load_pouring(
            dataset_dir=CONFIG.dataset_path,
            stride=CONFIG.pouring.stride,
            dict_ok=False)
    elif CONFIG.dataset == "multiview_pouring":
        dataset_train, dataset_test = load_multiview_pouring(
            dataset_dir=CONFIG.dataset_path,
            stride=CONFIG.multiview_pouring.stride,
            dict_ok=False)
    else:
        print("dataset error.")
        exit()

    dataset_train = load_dataset(dataset_train,
                                 augment=None,
                                 img_size=CONFIG.img_size,
                                 k=CONFIG.k)
    dataset_test = load_dataset(dataset_test,
                                augment=None,
                                img_size=CONFIG.img_size,
                                k=CONFIG.k)
    train_iter = MultiprocessIterator(dataset_train,
                                      batch_size=CONFIG.batchsize,
                                      n_processes=6)
    test_iter = MultiprocessIterator(dataset_test,
                                     batch_size=1,
                                     n_processes=6,
                                     repeat=False,
                                     shuffle=None)

    model = tcc(use_bn=True, k=CONFIG.k)
    device = chainer.get_device(OPTION.device)
    device.use()
    model.to_device(device)

    optimizer = make_optimizer(model)

    if CONFIG.weight_decay_rate != 0:
        for param in model.params():
            param.update_rule.add_hook(WeightDecay(CONFIG.weight_decay_rate))

    updater = tcc_updater({"main": train_iter}, optimizer, device)

    trainer = Trainer(updater, (CONFIG.iteration, 'iteration'), out=output_dir)

    display_interval = (100, 'iteration')
    plot_interval = (100, 'iteration')
    trainer.extend(extensions.ProgressBar(update_interval=5))
    trainer.extend(
        extensions.LogReport(trigger=display_interval, filename='log.txt'))
    trainer.extend(extensions.PrintReport(
        ["iteration", "main/loss", "test/loss", "test/tau", "elapsed_time"]),
                   trigger=display_interval)

    trainer.extend(extensions.PlotReport(["main/loss", "test/loss"],
                                         "iteration",
                                         file_name="loss.png"),
                   trigger=plot_interval)

    trainer.extend(evaluator(test_iter,
                             model,
                             device,
                             epoch=plot_interval[0],
                             out=output_dir),
                   trigger=plot_interval)
    trainer.extend(extensions.PlotReport(["test/tau"],
                                         "iteration",
                                         file_name="tau.png"),
                   trigger=plot_interval)

    trainer.extend(extensions.snapshot_object(model,
                                              "{.updater.iteration}" + ".npz"),
                   trigger=plot_interval)

    trainer.run()
Example #21
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument(
        '--model',
        choices=('faster_rcnn_fpn_resnet50', 'faster_rcnn_fpn_resnet101'),
        default='faster_rcnn_fpn_resnet50')
    parser.add_argument('--batchsize', type=int, default=16)
    parser.add_argument('--iteration', type=int, default=90000)
    parser.add_argument('--step', type=int, nargs='*', default=[60000, 80000])
    parser.add_argument('--out', default='result')
    parser.add_argument('--resume')
    args = parser.parse_args()

    # https://docs.chainer.org/en/stable/chainermn/tutorial/tips_faqs.html#using-multiprocessiterator
    if hasattr(multiprocessing, 'set_start_method'):
        multiprocessing.set_start_method('forkserver')
        p = multiprocessing.Process()
        p.start()
        p.join()

    comm = chainermn.create_communicator()
    device = comm.intra_rank

    if args.model == 'faster_rcnn_fpn_resnet50':
        model = FasterRCNNFPNResNet50(
            n_fg_class=len(coco_bbox_label_names), pretrained_model='imagenet')
    elif args.model == 'faster_rcnn_fpn_resnet101':
        model = FasterRCNNFPNResNet101(
            n_fg_class=len(coco_bbox_label_names), pretrained_model='imagenet')

    model.use_preset('evaluate')
    train_chain = TrainChain(model)
    chainer.cuda.get_device_from_id(device).use()
    train_chain.to_gpu()

    train = TransformDataset(
        COCOBboxDataset(year='2017', split='train'),
        ('img', 'bbox', 'label'), transform)

    if comm.rank == 0:
        indices = np.arange(len(train))
    else:
        indices = None
    indices = chainermn.scatter_dataset(indices, comm, shuffle=True)
    train = train.slice[indices]

    train_iter = chainer.iterators.MultithreadIterator(
        train, args.batchsize // comm.size)

    optimizer = chainermn.create_multi_node_optimizer(
        chainer.optimizers.MomentumSGD(), comm)
    optimizer.setup(train_chain)
    optimizer.add_hook(WeightDecay(0.0001))

    model.extractor.base.conv1.disable_update()
    model.extractor.base.res2.disable_update()
    for link in model.links():
        if isinstance(link, L.BatchNormalization):
            link.disable_update()

    updater = training.updaters.StandardUpdater(
        train_iter, optimizer, converter=converter, device=device)
    trainer = training.Trainer(
        updater, (args.iteration * 16 / args.batchsize, 'iteration'), args.out)

    @make_shift('lr')
    def lr_schedule(trainer):
        base_lr = 0.02 * args.batchsize / 16
        warm_up_duration = 500
        warm_up_rate = 1 / 3

        iteration = trainer.updater.iteration
        if iteration < warm_up_duration:
            rate = warm_up_rate \
                + (1 - warm_up_rate) * iteration / warm_up_duration
        else:
            rate = 1
            for step in args.step:
                if iteration >= step * 16 / args.batchsize:
                    rate *= 0.1

        return base_lr * rate

    trainer.extend(lr_schedule)

    if comm.rank == 0:
        log_interval = 10, 'iteration'
        trainer.extend(extensions.LogReport(trigger=log_interval))
        trainer.extend(extensions.observe_lr(), trigger=log_interval)
        trainer.extend(extensions.PrintReport(
            ['epoch', 'iteration', 'lr', 'main/loss',
             'main/loss/rpn/loc', 'main/loss/rpn/conf',
             'main/loss/head/loc', 'main/loss/head/conf']),
            trigger=log_interval)
        trainer.extend(extensions.ProgressBar(update_interval=10))

        trainer.extend(extensions.snapshot(), trigger=(10000, 'iteration'))
        trainer.extend(
            extensions.snapshot_object(
                model, 'model_iter_{.updater.iteration}'),
            trigger=(90000 * 16 / args.batchsize, 'iteration'))

    if args.resume:
        serializers.load_npz(args.resume, trainer, strict=False)

    trainer.run()
Example #22
0
def train(opt):
    if opt.use_cpu:
        device = -1
        print('[Message] use CPU')
    else:
        device = 0
        print('[Message] use GPU0')

    annotated = get_dataset(opt)
    unlabeled = get_unlabel_dataset(opt)
    print('[Message] loaded options')

    train_iter = SerialIterator(annotated, opt.batch_size, shuffle=True)
    print('[Message] converted to iterator (train)')
    semi_iter = SerialIterator(unlabeled, opt.batch_size, shuffle=True)
    print('[Message] converted to iterator (semi)')

    gen = ResNetDeepLab(opt)
    #gen = DilatedFCN(opt)
    #gen = UNet(opt)

    if device != -1:
        gen.to_gpu(device)  #use GPU
    g_optim = Adam(alpha=opt.g_lr, beta1=opt.g_beta1, beta2=opt.g_beta2)
    g_optim.setup(gen)
    if opt.g_weight_decay > 0:
        g_optim.add_hook(WeightDecay(opt.g_weight_decay))
    print('[Message] setuped Generator')

    dis = FCN(opt)
    if device != -1:
        dis.to_gpu(device)  #use GPU
    d_optim = Adam(alpha=opt.d_lr, beta1=opt.d_beta1, beta2=opt.d_beta2)
    d_optim.setup(dis)
    print('[Message] setuped Discriminator')

    updater = AdvSemiSeg_Updater(opt,
                                 iterator={
                                     'main': train_iter,
                                     'semi': semi_iter
                                 },
                                 optimizer={
                                     'gen': g_optim,
                                     'dis': d_optim
                                 },
                                 device=device)
    print('[Message] initialized Updater')

    trainer = Trainer(updater, (opt.max_epoch, 'epoch'), out=opt.out_dir)
    print('[Message] initialized Trainer')

    #chainer training extensions
    trainer.extend(ex.LogReport(log_name=None, trigger=(1, 'iteration')))
    trainer.extend(ex.ProgressBar((opt.max_epoch, 'epoch'), update_interval=1))

    trainer.extend(
        ex.PlotReport(['gen/adv_loss', 'dis/adv_loss', 'gen/semi_adv_loss'],
                      x_key='iteration',
                      file_name='adversarial_loss.png',
                      trigger=(100, 'iteration')))

    #test
    trainer.extend(
        ex.PlotReport(['gen/adv_loss'],
                      x_key='iteration',
                      file_name='adv_gen_loss.png',
                      trigger=(100, 'iteration')))

    trainer.extend(
        ex.PlotReport(['gen/ce_loss'],
                      x_key='iteration',
                      file_name='cross_entropy_loss.png',
                      trigger=(100, 'iteration')))

    trainer.extend(
        ex.PlotReport(['gen/semi_st_loss'],
                      x_key='iteration',
                      file_name='self_teach_loss.png',
                      trigger=(100, 'iteration')))

    trainer.extend(
        ex.PlotReport(['gen/loss', 'dis/loss', 'gen/semi_loss'],
                      x_key='iteration',
                      file_name='loss.png',
                      trigger=(100, 'iteration')))

    trainer.extend(
        ex.PlotReport(['gen/loss', 'dis/loss', 'gen/semi_loss'],
                      x_key='epoch',
                      file_name='loss_details.png',
                      trigger=(5, 'epoch')))

    trainer.extend(
        ex.PlotReport(['gen/semi_loss'],
                      x_key='epoch',
                      file_name='semi_loss.png',
                      trigger=(1, 'epoch')))

    #snap
    trainer.extend(ex.snapshot_object(
        gen, 'gen_snapshot_epoch-{.updater.epoch}.npz'),
                   trigger=(opt.snap_interval_epoch, 'epoch'))
    trainer.extend(ex.snapshot_object(
        dis, 'dis_snapshot_epoch-{.updater.epoch}.npz'),
                   trigger=(opt.snap_interval_epoch, 'epoch'))

    trainer.extend(lambda *args: updater.save_img(),
                   trigger=(opt.img_interval_iteration, 'iteration'),
                   priority=PRIORITY_READER)

    trainer.extend(lambda *args: updater.ignition_semi_learning(),
                   trigger=(opt.semi_ignit_iteration, 'iteration'),
                   priority=PRIORITY_READER)

    trainer.extend(lambda *args: adam_lr_poly(opt, trainer),
                   trigger=(100, 'iteration'))
    print('[Message] initialized extension')

    print('[Message] start training ...')
    trainer.run()  #start learning
Example #23
0
def main():
  parser = argparse.ArgumentParser()
  parser.add_argument('--model',
                      choices=('ssd300', 'ssd512'),
                      default='ssd300')
  parser.add_argument('--batchsize', type=int, default=32)
  parser.add_argument('--test-batchsize', type=int, default=16)
  parser.add_argument('--iteration', type=int, default=120000)
  parser.add_argument('--step', type=int, nargs='*', default=[80000, 100000])
  parser.add_argument('--gpu', type=int, default=-1)
  parser.add_argument('--out', default='result')
  parser.add_argument('--resume')
  parser.add_argument('--dtype',
                      type=str,
                      choices=dtypes.keys(),
                      default='float32',
                      help='Select the data type of the model')
  parser.add_argument('--model-dir',
                      default=None,
                      type=str,
                      help='Where to store models')
  parser.add_argument('--dataset-dir',
                      default=None,
                      type=str,
                      help='Where to store datasets')
  parser.add_argument('--dynamic-interval',
                      default=None,
                      type=int,
                      help='Interval for dynamic loss scaling')
  parser.add_argument('--init-scale',
                      default=1,
                      type=float,
                      help='Initial scale for ada loss')
  parser.add_argument('--loss-scale-method',
                      default='approx_range',
                      type=str,
                      help='Method for adaptive loss scaling')
  parser.add_argument('--scale-upper-bound',
                      default=32800,
                      type=float,
                      help='Hard upper bound for each scale factor')
  parser.add_argument('--accum-upper-bound',
                      default=32800,
                      type=float,
                      help='Accumulated upper bound for all scale factors')
  parser.add_argument('--update-per-n-iteration',
                      default=100,
                      type=int,
                      help='Update the loss scale value per n iteration')
  parser.add_argument('--snapshot-per-n-iteration',
                      default=10000,
                      type=int,
                      help='The frequency of taking snapshots')
  parser.add_argument('--n-uf', default=1e-3, type=float)
  parser.add_argument('--nosanity-check', default=False, action='store_true')
  parser.add_argument('--nouse-fp32-update',
                      default=False, action='store_true')
  parser.add_argument('--profiling', default=False, action='store_true')
  parser.add_argument('--verbose',
                      action='store_true',
                      default=False,
                      help='Verbose output')
  args = parser.parse_args()

  # Setting data types
  if args.dtype != 'float32':
    chainer.global_config.use_cudnn = 'never'
  chainer.global_config.dtype = dtypes[args.dtype]
  print('==> Setting the data type to {}'.format(args.dtype))

  # Initialize model
  if args.model == 'ssd300':
    model = SSD300(n_fg_class=len(voc_bbox_label_names),
                   pretrained_model='imagenet')
  elif args.model == 'ssd512':
    model = SSD512(n_fg_class=len(voc_bbox_label_names),
                   pretrained_model='imagenet')

  model.use_preset('evaluate')

  # Apply adaptive loss scaling
  recorder = AdaLossRecorder(sample_per_n_iter=100)
  profiler = Profiler()
  sanity_checker = SanityChecker(check_per_n_iter=100) if not args.nosanity_check else None
  # Update the model to support AdaLoss
  # TODO: refactorize
  model_ = AdaLossScaled(
      model,
      init_scale=args.init_scale,
      cfg={
          'loss_scale_method': args.loss_scale_method,
          'scale_upper_bound': args.scale_upper_bound,
          'accum_upper_bound': args.accum_upper_bound,
          'update_per_n_iteration': args.update_per_n_iteration,
          'recorder': recorder,
          'profiler': profiler,
          'sanity_checker': sanity_checker,
          'n_uf_threshold': args.n_uf,
      },
      transforms=[
          AdaLossTransformLinear(),
          AdaLossTransformConvolution2D(),
      ],
      verbose=args.verbose)

  # Finalize the model
  train_chain = MultiboxTrainChain(model_)
  if args.gpu >= 0:
    chainer.cuda.get_device_from_id(args.gpu).use()
    cp.random.seed(0)

    # NOTE: we have to transfer modules explicitly to GPU
    model.coder.to_gpu()
    model.extractor.to_gpu()
    model.multibox.to_gpu()

  # Prepare dataset
  if args.model_dir is not None:
    chainer.dataset.set_dataset_root(args.model_dir)
  train = TransformDataset(
      ConcatenatedDataset(VOCBboxDataset(year='2007', split='trainval'),
                          VOCBboxDataset(year='2012', split='trainval')),
      Transform(model.coder, model.insize, model.mean, dtype=dtypes[args.dtype]))
  # train_iter = chainer.iterators.MultiprocessIterator(
  #     train, args.batchsize) # , n_processes=8, n_prefetch=2)
  train_iter = chainer.iterators.MultithreadIterator(train, args.batchsize)
  # train_iter = chainer.iterators.SerialIterator(train, args.batchsize)

  test = VOCBboxDataset(year='2007',
                        split='test',
                        use_difficult=True,
                        return_difficult=True)
  test_iter = chainer.iterators.SerialIterator(test,
                                               args.test_batchsize,
                                               repeat=False,
                                               shuffle=False)

  # initial lr is set to 1e-3 by ExponentialShift
  optimizer = chainer.optimizers.MomentumSGD()
  if args.dtype == 'mixed16':
    if not args.nouse_fp32_update:
      print('==> Using FP32 update for dtype=mixed16')
      optimizer.use_fp32_update()  # by default use fp32 update

    # HACK: support skipping update by existing loss scaling functionality
    if args.dynamic_interval is not None:
      optimizer.loss_scaling(interval=args.dynamic_interval, scale=None)
    else:
      optimizer.loss_scaling(interval=float('inf'), scale=None)
      optimizer._loss_scale_max = 1.0  # to prevent actual loss scaling

  optimizer.setup(train_chain)
  for param in train_chain.params():
    if param.name == 'b':
      param.update_rule.add_hook(GradientScaling(2))
    else:
      param.update_rule.add_hook(WeightDecay(0.0005))

  updater = training.updaters.StandardUpdater(train_iter,
                                              optimizer,
                                              device=args.gpu)
  trainer = training.Trainer(updater, (args.iteration, 'iteration'),
                             args.out)
  trainer.extend(extensions.ExponentialShift('lr', 0.1, init=1e-3),
                 trigger=triggers.ManualScheduleTrigger(
                     args.step, 'iteration'))

  trainer.extend(DetectionVOCEvaluator(test_iter,
                                       model,
                                       use_07_metric=True,
                                       label_names=voc_bbox_label_names),
                 trigger=triggers.ManualScheduleTrigger(
                     args.step + [args.iteration], 'iteration'))

  log_interval = 10, 'iteration'
  trainer.extend(extensions.LogReport(trigger=log_interval))
  trainer.extend(extensions.observe_lr(), trigger=log_interval)
  trainer.extend(extensions.observe_value(
      'loss_scale',
      lambda trainer: trainer.updater.get_optimizer('main')._loss_scale),
      trigger=log_interval)

  metrics = [
      'epoch', 'iteration', 'lr', 'main/loss', 'main/loss/loc',
      'main/loss/conf', 'validation/main/map'
  ]
  if args.dynamic_interval is not None:
    metrics.insert(2, 'loss_scale')
  trainer.extend(extensions.PrintReport(metrics), trigger=log_interval)
  trainer.extend(extensions.ProgressBar(update_interval=10))

  trainer.extend(extensions.snapshot(),
                 trigger=triggers.ManualScheduleTrigger(
                     args.step + [args.iteration], 'iteration'))
  trainer.extend(extensions.snapshot_object(
      model, 'model_iter_{.updater.iteration}'),
      trigger=(args.iteration, 'iteration'))

  if args.resume:
    serializers.load_npz(args.resume, trainer)

  hook = AdaLossMonitor(sample_per_n_iter=100,
                        verbose=args.verbose,
                        includes=['Grad', 'Deconvolution'])
  recorder.trainer = trainer
  hook.trainer = trainer

  with ExitStack() as stack:
    stack.enter_context(hook)
    trainer.run()

  recorder.export().to_csv(os.path.join(args.out, 'loss_scale.csv'))
  profiler.export().to_csv(os.path.join(args.out, 'profile.csv'))
  if sanity_checker:
    sanity_checker.export().to_csv(os.path.join(args.out, 'sanity_check.csv'))
  hook.export_history().to_csv(os.path.join(args.out, 'grad_stats.csv'))
Example #24
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('dataset', help="path to train json file")
    parser.add_argument('test_dataset', help="path to test dataset json file")
    parser.add_argument(
        '--dataset-root',
        help=
        "path to dataset root if dataset file is not already in root folder of dataset"
    )
    parser.add_argument('--model',
                        choices=('ssd300', 'ssd512'),
                        default='ssd512')
    parser.add_argument('--batchsize', type=int, default=32)
    parser.add_argument('--gpu', type=int, nargs='*', default=[])
    parser.add_argument('--out', default='result')
    parser.add_argument('--resume')
    parser.add_argument('--lr',
                        type=float,
                        default=0.001,
                        help="default learning rate")
    parser.add_argument('--port',
                        type=int,
                        default=1337,
                        help="port for bbox sending")
    parser.add_argument('--ip',
                        default='127.0.0.1',
                        help="destination ip for bbox sending")
    parser.add_argument(
        '--test-image',
        help="path to test image that shall be displayed in bbox vis")
    args = parser.parse_args()

    if args.dataset_root is None:
        args.dataset_root = os.path.dirname(args.dataset)

    if args.model == 'ssd300':
        model = SSD300(n_fg_class=1, pretrained_model='imagenet')
        image_size = (300, 300)
    elif args.model == 'ssd512':
        model = SSD512(n_fg_class=1, pretrained_model='imagenet')
        image_size = (512, 512)
    else:
        raise NotImplementedError("The model you want to train does not exist")

    model.use_preset('evaluate')
    train_chain = MultiboxTrainChain(model)

    train = TransformDataset(
        SheepDataset(args.dataset_root, args.dataset, image_size=image_size),
        Transform(model.coder, model.insize, model.mean))

    if len(args.gpu) > 1:
        gpu_datasets = split_dataset_n_random(train, len(args.gpu))
        if not len(gpu_datasets[0]) == len(gpu_datasets[-1]):
            adapted_second_split = split_dataset(gpu_datasets[-1],
                                                 len(gpu_datasets[0]))[0]
            gpu_datasets[-1] = adapted_second_split
    else:
        gpu_datasets = [train]

    train_iter = [
        ThreadIterator(gpu_dataset, args.batchsize)
        for gpu_dataset in gpu_datasets
    ]

    test = SheepDataset(args.dataset_root,
                        args.test_dataset,
                        image_size=image_size)
    test_iter = chainer.iterators.MultithreadIterator(test,
                                                      args.batchsize,
                                                      repeat=False,
                                                      shuffle=False,
                                                      n_threads=2)

    # initial lr is set to 1e-3 by ExponentialShift
    optimizer = chainer.optimizers.Adam(alpha=args.lr)
    optimizer.setup(train_chain)
    for param in train_chain.params():
        if param.name == 'b':
            param.update_rule.add_hook(GradientScaling(2))
        else:
            param.update_rule.add_hook(WeightDecay(0.0005))

    if len(args.gpu) <= 1:
        updater = training.updaters.StandardUpdater(
            train_iter[0],
            optimizer,
            device=args.gpu[0] if len(args.gpu) > 0 else -1,
        )
    else:
        updater = training.updaters.MultiprocessParallelUpdater(
            train_iter, optimizer, devices=args.gpu)
        updater.setup_workers()

    if len(args.gpu) > 0 and args.gpu[0] >= 0:
        chainer.backends.cuda.get_device_from_id(args.gpu[0]).use()
        model.to_gpu()

    trainer = training.Trainer(updater, (200, 'epoch'), args.out)

    trainer.extend(DetectionVOCEvaluator(test_iter,
                                         model,
                                         use_07_metric=True,
                                         label_names=voc_bbox_label_names),
                   trigger=(1000, 'iteration'))

    # build logger
    # make sure to log all data necessary for prediction
    log_interval = 100, 'iteration'
    data_to_log = {
        'image_size': image_size,
        'model_type': args.model,
    }

    # add all command line arguments
    for argument in filter(lambda x: not x.startswith('_'), dir(args)):
        data_to_log[argument] = getattr(args, argument)

    # create callback that logs all auxiliary data the first time things get logged
    def backup_train_config(stats_cpu):
        if stats_cpu['iteration'] == log_interval:
            stats_cpu.update(data_to_log)

    trainer.extend(
        extensions.LogReport(trigger=log_interval,
                             postprocess=backup_train_config))
    trainer.extend(extensions.observe_lr(), trigger=log_interval)
    trainer.extend(extensions.PrintReport([
        'epoch', 'iteration', 'lr', 'main/loss', 'main/loss/loc',
        'main/loss/conf', 'validation/main/map'
    ]),
                   trigger=log_interval)
    trainer.extend(extensions.ProgressBar(update_interval=10))

    trainer.extend(extensions.snapshot_object(
        model, 'model_iter_{.updater.iteration}'),
                   trigger=(5000, 'iteration'))

    if args.test_image is not None:
        plot_image = train._dataset.load_image(args.test_image,
                                               resize_to=image_size)
    else:
        plot_image, _, _ = train.get_example(0)
        plot_image += train._transform.mean

    bbox_plotter = BBOXPlotter(
        plot_image,
        os.path.join(args.out, 'bboxes'),
        send_bboxes=True,
        upstream_port=args.port,
        upstream_ip=args.ip,
    )
    trainer.extend(bbox_plotter, trigger=(10, 'iteration'))

    if args.resume:
        serializers.load_npz(args.resume, trainer)

    trainer.run()
Example #25
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--model',
                        choices=('ssd300', 'ssd512'),
                        default='ssd300')
    parser.add_argument('--batchsize', type=int, default=32)
    parser.add_argument('--iteration', type=int, default=120000)
    parser.add_argument('--step', type=int, nargs='*', default=[80000, 100000])
    parser.add_argument('--gpu', type=int, default=-1)
    parser.add_argument('--out', default='result')
    parser.add_argument('--resume')
    args = parser.parse_args()

    if args.model == 'ssd300':
        model = SSD300(n_fg_class=len(voc_bbox_label_names),
                       pretrained_model='imagenet')
    elif args.model == 'ssd512':
        model = SSD512(n_fg_class=len(voc_bbox_label_names),
                       pretrained_model='imagenet')

    model.use_preset('evaluate')
    train_chain = MultiboxTrainChain(model)
    if args.gpu >= 0:
        chainer.cuda.get_device_from_id(args.gpu).use()
        model.to_gpu()

    train = TransformDataset(
        ConcatenatedDataset(VOCBboxDataset(year='2007', split='trainval'),
                            VOCBboxDataset(year='2012', split='trainval')),
        Transform(model.coder, model.insize, model.mean))
    train_iter = chainer.iterators.MultiprocessIterator(train, args.batchsize)

    test = VOCBboxDataset(year='2007',
                          split='test',
                          use_difficult=True,
                          return_difficult=True)
    test_iter = chainer.iterators.SerialIterator(test,
                                                 args.batchsize,
                                                 repeat=False,
                                                 shuffle=False)

    # initial lr is set to 1e-3 by ExponentialShift
    optimizer = chainer.optimizers.MomentumSGD()
    optimizer.setup(train_chain)
    for param in train_chain.params():
        if param.name == 'b':
            param.update_rule.add_hook(GradientScaling(2))
        else:
            param.update_rule.add_hook(WeightDecay(0.0005))

    updater = training.updaters.StandardUpdater(train_iter,
                                                optimizer,
                                                device=args.gpu)
    trainer = training.Trainer(updater, (args.iteration, 'iteration'),
                               args.out)
    trainer.extend(extensions.ExponentialShift('lr', 0.1, init=1e-3),
                   trigger=triggers.ManualScheduleTrigger(
                       args.step, 'iteration'))

    trainer.extend(DetectionVOCEvaluator(test_iter,
                                         model,
                                         use_07_metric=True,
                                         label_names=voc_bbox_label_names),
                   trigger=triggers.ManualScheduleTrigger(
                       args.step + [args.iteration], 'iteration'))

    log_interval = 10, 'iteration'
    trainer.extend(extensions.LogReport(trigger=log_interval))
    trainer.extend(extensions.observe_lr(), trigger=log_interval)
    trainer.extend(extensions.PrintReport([
        'epoch', 'iteration', 'lr', 'main/loss', 'main/loss/loc',
        'main/loss/conf', 'validation/main/map'
    ]),
                   trigger=log_interval)
    trainer.extend(extensions.ProgressBar(update_interval=10))

    trainer.extend(extensions.snapshot(),
                   trigger=triggers.ManualScheduleTrigger(
                       args.step + [args.iteration], 'iteration'))
    trainer.extend(extensions.snapshot_object(
        model, 'model_iter_{.updater.iteration}'),
                   trigger=(args.iteration, 'iteration'))

    if args.resume:
        serializers.load_npz(args.resume, trainer)

    trainer.run()
        optimizer = optimizers.AdaDelta()

    optimizer.setup(net)

    if args.lasso:
        #Lasso回帰でスパース化
        from chainer.optimizer_hooks import Lasso
        for param in net.params():
            if param.name != 'b':
                param.update_rule.add_hook(Lasso(decay))
    else:
        #Ridge回帰で過学習抑制
        from chainer.optimizer_hooks import WeightDecay
        for param in net.params():
            if param.name != 'b':
                param.update_rule.add_hook(WeightDecay(decay))

    #ミニバッチ学習
    from chainer import Variable
    gx = []
    gy = []
    for i in range(rep):
        sffindx = np.random.permutation(train_len)
        x = Variable(
            np.array(train_datas)[sffindx[i:(
                i + bs) if (i + bs) < train_len else train_len]])
        t = Variable(
            np.array(train_labels)[sffindx[i:(
                i + bs) if (i + bs) < train_len else train_len]])
        net.cleargrads()
        loss = net(x, t)
Example #27
0
def handler(context):
    dataset_alias = context.datasets
    trainval_2007_dataset_id = dataset_alias['trainval2007']
    trainval_2012_dataset_id = dataset_alias['trainval2012']
    test_2007_dataset_id = dataset_alias['test2007']

    trainval_2007_dataset = list(
        load_dataset_from_api(trainval_2007_dataset_id))
    trainval_2012_dataset = list(
        load_dataset_from_api(trainval_2012_dataset_id))
    test_2007_dataset = list(load_dataset_from_api(test_2007_dataset_id))

    if network_model == 'ssd300':
        model = SSD300(n_fg_class=len(voc_bbox_label_names),
                       pretrained_model='imagenet')
    elif network_model == 'ssd512':
        model = SSD512(n_fg_class=len(voc_bbox_label_names),
                       pretrained_model='imagenet')

    model.use_preset('evaluate')
    train_chain = MultiboxTrainChain(model)
    if USE_GPU >= 0:
        chainer.cuda.get_device_from_id(USE_GPU).use()
        model.to_gpu()

    trainval_2007 = DetectionDatasetFromAPI(trainval_2007_dataset)
    trainval_2012 = DetectionDatasetFromAPI(trainval_2012_dataset)
    test_2007 = DetectionDatasetFromAPI(test_2007_dataset,
                                        use_difficult=True,
                                        return_difficult=True)

    train = TransformDataset(ConcatenatedDataset(trainval_2007, trainval_2012),
                             Transform(model.coder, model.insize, model.mean))
    train_iter = chainer.iterators.SerialIterator(train, BATCHSIZE)

    test_iter = chainer.iterators.SerialIterator(test_2007,
                                                 BATCHSIZE,
                                                 repeat=False,
                                                 shuffle=False)

    # initial lr is set to 1e-3 by ExponentialShift
    optimizer = chainer.optimizers.MomentumSGD()
    optimizer.setup(train_chain)
    for param in train_chain.params():
        if param.name == 'b':
            param.update_rule.add_hook(GradientScaling(2))
        else:
            param.update_rule.add_hook(WeightDecay(0.0005))

    updater = training.updaters.StandardUpdater(train_iter,
                                                optimizer,
                                                device=USE_GPU)
    trainer = training.Trainer(updater, (nb_iterations, 'iteration'),
                               out=ABEJA_TRAINING_RESULT_DIR)
    trainer.extend(extensions.ExponentialShift('lr', 0.1, init=1e-3),
                   trigger=triggers.ManualScheduleTrigger([80000, 100000],
                                                          'iteration'))

    trainer.extend(DetectionVOCEvaluator(test_iter,
                                         model,
                                         use_07_metric=True,
                                         label_names=voc_bbox_label_names),
                   trigger=(10000, 'iteration'))

    log_interval = 100, 'iteration'
    trainer.extend(extensions.LogReport(trigger=log_interval))
    trainer.extend(extensions.observe_lr(), trigger=log_interval)

    print_entries = [
        'iteration', 'main/loss', 'main/loss/loc', 'main/loss/conf',
        'validation/main/map'
    ]
    report_entries = [
        'epoch', 'iteration', 'lr', 'main/loss', 'main/loss/loc',
        'main/loss/conf', 'validation/main/map'
    ]

    trainer.extend(Statistics(report_entries,
                              nb_iterations,
                              obs_key='iteration'),
                   trigger=log_interval)
    trainer.extend(Tensorboard(report_entries, out_dir=log_path))
    trainer.extend(extensions.PrintReport(print_entries), trigger=log_interval)

    trainer.extend(extensions.snapshot_object(
        model, 'model_iter_{.updater.iteration}'),
                   trigger=(nb_iterations, 'iteration'))

    trainer.run()
Example #28
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--model',
                        choices=('multi_task_300', 'multi_task_512'),
                        default='multi_task_300')
    parser.add_argument('--batchsize', type=int, default=32)
    parser.add_argument('--iteration', type=int, default=120000)
    parser.add_argument('--eval_step',
                        type=int,
                        nargs='*',
                        default=[80000, 100000, 120000])
    parser.add_argument('--lr_step',
                        type=int,
                        nargs='*',
                        default=[80000, 100000])
    parser.add_argument('--lr', type=float, default=0.001)
    parser.add_argument('--snap_step', type=int, default=10000)
    parser.add_argument('--gpu', type=int, default=-1)
    parser.add_argument('--out',
                        default='result')  # in experiments for real experiment
    parser.add_argument('--resume', type=str)
    parser.add_argument('--detection', action='store_true', default=False)
    parser.add_argument('--segmentation', action='store_true', default=False)
    parser.add_argument('--attention', action='store_true', default=False)
    parser.add_argument('--dataset', default='voc', type=str)
    parser.add_argument('--experiment', type=str, default='final_voc')
    parser.add_argument('--multitask_loss', action='store_true', default=False)
    parser.add_argument('--dynamic_loss', action='store_true', default=False)
    parser.add_argument('--log_interval', type=int, default=10)
    parser.add_argument('--debug', action='store_true', default=False)
    parser.add_argument('--update_split_interval', type=int, default=100)
    parser.add_argument(
        '--loss_split', type=float, default=0.5
    )  # in fact for detection, other task(segmentation) is 1-loss_split
    args = parser.parse_args()
    snap_step = args.snap_step
    args.snap_step = []
    for step in range(snap_step, args.iteration + 1, snap_step):
        args.snap_step.append(step)

    # redefine the output path
    import os
    import time
    args.out = os.path.join(args.out, args.experiment,
                            time.strftime("%Y%m%d_%H%M%S", time.localtime()))

    if args.model == 'multi_task_300':
        model = Multi_task_300(n_fg_class=len(voc_bbox_label_names),
                               pretrained_model='imagenet',
                               detection=args.detection,
                               segmentation=args.segmentation,
                               attention=args.attention)
    elif args.model == 'multi_task_512':
        model = Multi_task_512(n_fg_class=len(voc_bbox_label_names),
                               pretrained_model='imagenet',
                               detection=args.detection,
                               segmentation=args.segmentation,
                               attention=args.attention)

    model.use_preset('evaluate')
    if not (args.segmentation or args.detection):
        raise RuntimeError

    train_chain = MultiboxTrainChain(model,
                                     gpu=args.gpu >= 0,
                                     use_multi_task_loss=args.multitask_loss,
                                     loss_split=args.loss_split)
    train_chain.cleargrads()

    if args.gpu >= 0:
        chainer.cuda.get_device_from_id(args.gpu).use()
        model.to_gpu()

    train = TransformDataset(
        Multi_task_VOC(voc_experiments[args.experiment][args.experiment +
                                                        '_train']),
        Transform(model.coder, model.insize, model.mean))
    train_iter = chainer.iterators.MultiprocessIterator(
        train, batch_size=args.batchsize)

    test = VOCBboxDataset(year='2007',
                          split='test',
                          use_difficult=True,
                          return_difficult=True)

    test_iter = chainer.iterators.SerialIterator(test,
                                                 args.batchsize,
                                                 repeat=False,
                                                 shuffle=False)

    test_mask = VOCSemanticSegmentationDataset(split='val')
    test_mask_iter = chainer.iterators.SerialIterator(test_mask,
                                                      args.batchsize,
                                                      repeat=False,
                                                      shuffle=False)

    optimizer = chainer.optimizers.MomentumSGD()
    optimizer.setup(train_chain)
    # optimizer.add_hook(GradientClipping(0.1))
    for param in train_chain.params():
        if param.name == 'b':
            param.update_rule.add_hook(GradientScaling(2))
        else:
            param.update_rule.add_hook(WeightDecay(0.0005))

    updater = training.updaters.StandardUpdater(train_iter,
                                                optimizer,
                                                device=args.gpu)
    trainer = training.Trainer(updater, (args.iteration, 'iteration'),
                               args.out)
    '''if args.resume:
        serializers.load_npz(args.resume, trainer)'''
    trainer.extend(extensions.ExponentialShift('lr', 0.1, init=args.lr),
                   trigger=triggers.ManualScheduleTrigger(
                       args.lr_step, 'iteration'))

    if args.dataset == 'voc':
        use_07 = True
        label_names = voc_bbox_label_names
    elif args.dataset == 'coco':
        label_names = coco_bbox_label_names
    if args.detection and not args.debug:
        trainer.extend(MultitaskEvaluator(test_iter,
                                          model,
                                          args.dataset,
                                          use_07,
                                          label_names=label_names),
                       trigger=triggers.ManualScheduleTrigger(
                           args.eval_step + [args.iteration], 'iteration'))

    if args.segmentation and not args.debug:
        trainer.extend(MultitaskEvaluator(test_mask_iter,
                                          model,
                                          dataset=args.dataset,
                                          label_names=label_names,
                                          detection=False),
                       trigger=triggers.ManualScheduleTrigger(
                           args.eval_step + [args.iteration], 'iteration'))

    log_interval = args.log_interval, 'iteration'
    trainer.extend(extensions.LogReport(trigger=log_interval))
    if args.segmentation and args.detection and args.dynamic_loss:
        trainer.extend(
            loss_split.LossSplit(trigger=(args.update_split_interval,
                                          'iteration')))
    trainer.extend(extensions.observe_lr(), trigger=log_interval)
    trainer.extend(extensions.PrintReport([
        'epoch', 'iteration', 'lr', 'main/loss', 'main/loss/mask',
        'main/loss/loc', 'main/loss/conf', 'main/loss/split'
    ]),
                   trigger=log_interval)
    trainer.extend(extensions.ProgressBar(update_interval=10))

    trainer.extend(extensions.snapshot(),
                   trigger=triggers.ManualScheduleTrigger(
                       args.snap_step + [args.iteration], 'iteration'))
    trainer.extend(extensions.snapshot_object(
        model, 'model_iter_{.updater.iteration}'),
                   trigger=triggers.ManualScheduleTrigger(
                       args.snap_step + [args.iteration], 'iteration'))
    if args.resume:
        if 'model' in args.resume:
            serializers.load_npz(args.resume, model)
        else:
            serializers.load_npz(args.resume, trainer)

    print(args)

    trainer.run()
Example #29
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--model',
                        choices=('ssd300', 'ssd512'),
                        default='ssd300')
    parser.add_argument('--batchsize', type=int, default=32)
    parser.add_argument('--test-batchsize', type=int, default=16)
    parser.add_argument('--iteration', type=int, default=120000)
    parser.add_argument('--step', type=int, nargs='*', default=[80000, 100000])
    parser.add_argument('--out', default='result')
    parser.add_argument('--resume')
    args = parser.parse_args()

    comm = chainermn.create_communicator()
    device = comm.intra_rank

    if args.model == 'ssd300':
        model = SSD300(n_fg_class=len(voc_bbox_label_names),
                       pretrained_model='imagenet')
    elif args.model == 'ssd512':
        model = SSD512(n_fg_class=len(voc_bbox_label_names),
                       pretrained_model='imagenet')

    model.use_preset('evaluate')
    train_chain = MultiboxTrainChain(model)
    chainer.cuda.get_device_from_id(device).use()
    model.to_gpu()

    train = TransformDataset(
        ConcatenatedDataset(VOCBboxDataset(year='2007', split='trainval'),
                            VOCBboxDataset(year='2012', split='trainval')),
        ('img', 'mb_loc', 'mb_label'),
        Transform(model.coder, model.insize, model.mean))

    if comm.rank == 0:
        indices = np.arange(len(train))
    else:
        indices = None
    indices = chainermn.scatter_dataset(indices, comm, shuffle=True)
    train = train.slice[indices]

    # http://chainermn.readthedocs.io/en/latest/tutorial/tips_faqs.html#using-multiprocessiterator
    if hasattr(multiprocessing, 'set_start_method'):
        multiprocessing.set_start_method('forkserver')
    train_iter = chainer.iterators.MultiprocessIterator(train,
                                                        args.batchsize //
                                                        comm.size,
                                                        n_processes=2)

    if comm.rank == 0:
        test = VOCBboxDataset(year='2007',
                              split='test',
                              use_difficult=True,
                              return_difficult=True)
        test_iter = chainer.iterators.SerialIterator(test,
                                                     args.test_batchsize,
                                                     repeat=False,
                                                     shuffle=False)

    # initial lr is set to 1e-3 by ExponentialShift
    optimizer = chainermn.create_multi_node_optimizer(
        chainer.optimizers.MomentumSGD(), comm)
    optimizer.setup(train_chain)
    for param in train_chain.params():
        if param.name == 'b':
            param.update_rule.add_hook(GradientScaling(2))
        else:
            param.update_rule.add_hook(WeightDecay(0.0005))

    updater = training.updaters.StandardUpdater(train_iter,
                                                optimizer,
                                                device=device)
    trainer = training.Trainer(updater, (args.iteration, 'iteration'),
                               args.out)
    trainer.extend(extensions.ExponentialShift('lr', 0.1, init=1e-3),
                   trigger=triggers.ManualScheduleTrigger(
                       args.step, 'iteration'))

    if comm.rank == 0:
        trainer.extend(DetectionVOCEvaluator(test_iter,
                                             model,
                                             use_07_metric=True,
                                             label_names=voc_bbox_label_names),
                       trigger=triggers.ManualScheduleTrigger(
                           args.step + [args.iteration], 'iteration'))

        log_interval = 10, 'iteration'
        trainer.extend(extensions.LogReport(trigger=log_interval))
        trainer.extend(extensions.observe_lr(), trigger=log_interval)
        trainer.extend(extensions.PrintReport([
            'epoch', 'iteration', 'lr', 'main/loss', 'main/loss/loc',
            'main/loss/conf', 'validation/main/map'
        ]),
                       trigger=log_interval)
        trainer.extend(extensions.ProgressBar(update_interval=10))

        trainer.extend(extensions.snapshot(),
                       trigger=triggers.ManualScheduleTrigger(
                           args.step + [args.iteration], 'iteration'))
        trainer.extend(extensions.snapshot_object(
            model, 'model_iter_{.updater.iteration}'),
                       trigger=(args.iteration, 'iteration'))

    if args.resume:
        serializers.load_npz(args.resume, trainer)

    trainer.run()
Example #30
0
def main():

    START = time.time()

    import argparse as arg
    parser = arg.ArgumentParser(description='ecfpWD_n2v')
    parser.add_argument('--gpu', '-g', type=int, default=1, help='GPU ID')
    parser.add_argument('--batchsize', '-b', type=int, default=100, help='minibatch')
    parser.add_argument('--epoch', '-e', type=int, default=150, help='number of max iteration to evaluate')
    parser.add_argument('--s1', type=int, default=1)
    parser.add_argument('--sa1', type=int, default=1)
    parser.add_argument('--s2', type=int, default=1)
    parser.add_argument('--sa2', type=int, default=1)
    parser.add_argument('--s3', type=int, default=1)
    parser.add_argument('--sa3', type=int, default=1)
    parser.add_argument('--j1', type=int, default=33)
    parser.add_argument('--pf1', type=int, default=64)
    parser.add_argument('--ja1', type=int, default=17)
    parser.add_argument('--j2', type=int, default=23)
    parser.add_argument('--pf2', type=int, default=64)
    parser.add_argument('--ja2', type=int, default=11)
    parser.add_argument('--j3', type=int, default=33)
    parser.add_argument('--pf3', type=int, default=32)
    parser.add_argument('--ja3', type=int, default=17)
    parser.add_argument('--n_hid3', type=int, default=70)
    parser.add_argument('--n_hid4', type=int, default=80)
    parser.add_argument('--n_hid5', type=int, default=60)
    parser.add_argument('--n_out', type=int, default=1)
    parser.add_argument('--prosize', type=int, default=5762)
    parser.add_argument('--input', '-i', default='./dataset/hard_dataset')
    parser.add_argument('--output', '-o', default='./result/hard_dataset')
    parser.add_argument('--frequency', type=int, default=1)
    args = parser.parse_args(args=[])

    print(args.gpu)
    print('GPU: ', args.gpu)
    print('# Minibatch-size: ', args.batchsize)
    print('')

    #-------------------------------
    # GPU check
    xp = np
    if args.gpu >= 0:
        print('GPU mode')
        #xp = cp

    #-------------------------------
    # Loading SMILEs
    for i in range(5):
        #i = i+4
        print('Making Training dataset...')
        ecfp = xp.load(args.input+'/cv_'+str(i)+'/train_fingerprint.npy')
        ecfp = xp.asarray(ecfp, dtype='float32').reshape(-1,1024)

        file_interactions=xp.load(args.input+'/cv_'+str(i)+'/train_interaction.npy')
        print('Loading labels: train_interaction.npy')
        cID = xp.load(args.input+'/cv_'+str(i)+'/train_chemIDs.npy')
        print('Loading chemIDs: train_chemIDs.npy')
        with open(args.input+'/cv_'+str(i)+'/train_proIDs.txt') as f:
            pID = [s.strip() for s in f.readlines()]
        print('Loading proIDs: train_proIDs.txt')
        n2v_c, n2v_p = [], []
        with open('./data_multi/modelpp.pickle', mode='rb') as f:
            modelpp = pickle.load(f)
        with open('./data_multi/modelcc.pickle', mode='rb') as f:
            modelcc = pickle.load(f)
        for j in cID:
            n2v_c.append(modelcc.wv[str(j)])
        for k in pID:
            n2v_p.append(modelpp.wv[k])
        interactions = xp.asarray(file_interactions, dtype='int32').reshape(-1,args.n_out)
        n2vc = np.asarray(n2v_c, dtype='float32').reshape(-1,128)
        n2vp = np.asarray(n2v_p, dtype='float32').reshape(-1,128)
        #reset memory
        del n2v_c, n2v_p, cID, pID, modelcc, modelpp, file_interactions
        gc.collect()

        file_sequences=xp.load(args.input+'/cv_'+str(i)+'/train_reprotein.npy')
        print('Loading sequences: train_reprotein.npy', flush=True)
        sequences = xp.asarray(file_sequences, dtype='float32').reshape(-1,1,args.prosize,plensize)
        # reset memory
        del file_sequences
        gc.collect()

        print(interactions.shape, ecfp.shape, sequences.shape, n2vc.shape, n2vp.shape, flush=True)

        print('Now concatenating...', flush=True)
        train_dataset = datasets.TupleDataset(ecfp, sequences, n2vc, n2vp, interactions)
        n = int(0.8 * len(train_dataset))
        train_dataset, valid_dataset = train_dataset[:n], train_dataset[n:]
        print('train: ', len(train_dataset), flush=True)
        print('valid: ', len(valid_dataset), flush=True)

        print('pattern: ', i, flush=True)
        output_dir = args.output+'/'+'ecfpN2vc_mSGD'+'/'+'pattern'+str(i)
        os.makedirs(output_dir)

        #-------------------------------
        #reset memory again
        del n, sequences, interactions, ecfp, n2vc, n2vp
        gc.collect()

        #-------------------------------
        # Set up a neural network to train
        print('Set up a neural network to train', flush=True)
        model = MV.CNN(args.prosize, plensize, args.batchsize, args.s1, args.sa1, args.s2, args.sa2, args.s3, args.sa3, args.j1, args.pf1, args.ja1, args.j2, args.pf2, args.ja2, args.j3, args.pf3, args.ja3, args.n_hid3, args.n_hid4, args.n_hid5, args.n_out)
        #-------------------------------
        # Make a specified GPU current
        if args.gpu >= 0:
            chainer.cuda.get_device_from_id(args.gpu).use()
            model.to_gpu()  # Copy the model to the GPU
        #-------------------------------
        # Setup an optimizer
        optimizer = chainer.optimizers.MomentumSGD(lr=0.01, momentum=0.9)
        optimizer.setup(model)
        #-------------------------------
        # L2 regularization(weight decay)
        for param in model.params():
            if param.name != 'b':  # バイアス以外だったら
                param.update_rule.add_hook(WeightDecay(0.00001))  # 重み減衰を適用
        #-------------------------------
        # Set up a trainer
        print('Trainer is setting up...', flush=True)

        train_iter = chainer.iterators.SerialIterator(train_dataset, batch_size= args.batchsize, shuffle=True)
        test_iter = chainer.iterators.SerialIterator(valid_dataset, batch_size= args.batchsize, repeat=False, shuffle=True)
        updater = training.StandardUpdater(train_iter, optimizer, device=args.gpu)
        trainer = training.Trainer(updater, (args.epoch, 'epoch'), out=output_dir)
        # Evaluate the model with the test dataset for each epoch
        trainer.extend(extensions.Evaluator(test_iter, model, device=args.gpu))
        # Take a snapshot for each specified epoch
        trainer.extend(extensions.snapshot_object(model, 'model_snapshot_{.updater.epoch}'), trigger=(args.frequency,'epoch'))
        # Write a log of evaluation statistics for each epoch
        trainer.extend(extensions.LogReport(trigger=(1, 'epoch'), log_name='log_epoch'))
        trainer.extend(extensions.LogReport(trigger=(10, 'iteration'), log_name='log_iteration'))
        # Print selected entries of the log to stdout
        trainer.extend(extensions.PrintReport( ['epoch', 'elapsed_time','main/loss', 'validation/main/loss','main/accuracy','validation/main/accuracy']))
        # Print some results
        trainer.extend(extensions.PlotReport(['main/loss', 'validation/main/loss'], x_key='epoch', file_name='loss.png'))
        trainer.extend(extensions.PlotReport(['main/accuracy', 'validation/main/accuracy'], x_key='epoch', file_name='accuracy.png'))
        # Print a progress bar to stdout
        trainer.extend(extensions.ProgressBar())

        # Run the training
        trainer.run()

        END = time.time()
        print('Nice, your Learning Job is done. Total time is {} sec.'.format(END-START))

        del model, train_iter, test_iter, updater, trainer
        gc.collect()