def _prepare_multinode_snapshot(n, result):
    n_units = 100
    batchsize = 10
    comm = create_communicator('naive')
    model = L.Classifier(MLP(n_units, 10))
    optimizer = chainermn.create_multi_node_optimizer(
        chainer.optimizers.Adam(), comm)
    optimizer.setup(model)

    if comm.rank == 0:
        train, _ = chainer.datasets.get_mnist()
    else:
        train, _ = None, None

    train = chainermn.scatter_dataset(train, comm, shuffle=True)
    train_iter = chainer.iterators.SerialIterator(train, batchsize)

    updater = StandardUpdater(train_iter, optimizer)
    trainer = Trainer(updater, out=result)

    snapshot = extensions.snapshot(target=updater, autoload=True)
    replica_sets = []
    mn_snapshot = multi_node_snapshot(comm, snapshot, replica_sets)
    mn_snapshot.initialize(trainer)
    for _ in range(n):
        updater.update()

    return updater, mn_snapshot, trainer
Esempio n. 2
0
def train(args):
    logger = logging.getLogger()
    logger.setLevel(getattr(logging, 'INFO'))
    logger.addHandler(logging.StreamHandler())
    rangelog.set_logger(logger)
    rangelog.set_start_msg("start... {name}")
    rangelog.set_end_msg("  end...")

    with rangelog("creating dataset") as logger:
        train_set, eval_set = get_cifar10()
        if args.sample_pairing:
            train_set = SamplePairingDataset(train_set)

    with rangelog("creating iterator") as logger:
        logger.info("train_set: {}, eval_set: {}".format(
            len(train_set), len(eval_set)))
        iterator = SerialIterator(train_set, args.batch, repeat=True)
        eval_iterator = SerialIterator(eval_set, args.batch, repeat=False)

    with rangelog("creating model") as logger:
        logger.info('GPU: {}'.format(args.device))
        model = Conv(10)
        chainer.cuda.get_device_from_id(args.device).use()
        model.to_gpu(args.device)

    with rangelog("creating optimizer"):
        optimizer = optimizers.Adam()
        optimizer.setup(model)

    with rangelog("creating trainer"):
        updater = StandardUpdater(
            iterator=iterator, optimizer=optimizer, device=args.device)
        trainer = training.Trainer(
            updater, (args.epoch, 'epoch'), out=args.store)

    with rangelog("trainer extension") as logger:
        trainer.extend(
            extensions.Evaluator(
                iterator=eval_iterator, target=model, device=args.device))
        trainer.extend(extensions.LogReport())
        trainer.extend(SourceBackup())
        trainer.extend(ArgumentBackup(args))
        try:
            slack = json.load(open("slack.json"))
        except Exception as e:
            logger.warn("Error {}".format(e))
        else:
            trainer.extend(SlackPost(slack["token"], slack["channel"]))
        trainer.extend(extensions.PrintReport(['epoch'] + args.report_keys))
        trainer.extend(extensions.ProgressBar(update_interval=1))
        trainer.extend(
            extensions.PlotReport(
                args.report_keys, 'epoch', file_name='plot.png'))
        trigger = MinValueTrigger(key='validation/main/loss')
        snapshoter = snapshot_object(model, filename=args.model_path)
        trainer.extend(snapshoter, trigger=trigger)

    with rangelog("training"):
        trainer.run()
    return model
Esempio n. 3
0
    def optimize(self, M: np.ndarray, X: np.ndarray):
        """
        Find the optimal affine transformation which minimizes the loss defined
        in :py:func:`AffineTransformation.get_loss_func`.

        Args:
            M: Stacked motion matrix of shape (2 * n_views, 3)
            X: 3D point cloud of shape (n_points, 3)
        """
        data_iter = iterators.SerialIterator(MotionMatrices(M), self.batchsize)
        object_iter = iterators.SerialIterator(Objects(X), 1, repeat=False)

        optimizer = optimizers.MomentumSGD(lr=self.learning_rate)
        optimizer.setup(self.model)
        updater = StandardUpdater(data_iter,
                                  optimizer,
                                  loss_func=self.model.get_loss_func())

        log_interval = (1, 'epoch')

        trainer = chainer.training.Trainer(updater, (self.epoch, 'epoch'))

        if self.X_eval is not None:
            trainer.extend(extensions.Evaluator(
                object_iter,
                self.model,
                eval_func=self.get_recornstruction_error_func()),
                           trigger=(1, 'epoch'))

        trainer.extend(extensions.LogReport(trigger=log_interval))
        trainer.extend(extensions.PrintReport(
            ['epoch', 'iteration', 'main/loss', 'reconstruction_error']),
                       trigger=log_interval)

        trainer.run()
Esempio n. 4
0
def main():
    train_x, train_y, val_x, val_y = load_pascal_voc_dataset(DATASET_ROOT)
    train_dataset = YoloDataset(train_x,
                                train_y,
                                target_size=model_class.img_size,
                                n_grid=model_class.n_grid,
                                augment=True)
    test_dataset = YoloDataset(val_x,
                               val_y,
                               target_size=model_class.img_size,
                               n_grid=model_class.n_grid,
                               augment=False)

    class_weights = [1.0 for i in range(train_dataset.n_classes)]
    class_weights[0] = 0.2
    model = model_class(n_classes=train_dataset.n_classes,
                        n_base_units=6,
                        class_weights=class_weights)
    if os.path.exists(RESULT_DIR + '/model_last.npz'):
        print('continue from previous result')
        chainer.serializers.load_npz(RESULT_DIR + '/model_last.npz', model)
    optimizer = Adam()
    optimizer.setup(model)

    train_iter = SerialIterator(train_dataset, batch_size=BATCH_SIZE)
    test_iter = SerialIterator(test_dataset,
                               batch_size=BATCH_SIZE,
                               shuffle=False,
                               repeat=False)
    updater = StandardUpdater(train_iter, optimizer, device=DEVICE)
    trainer = Trainer(updater, (N_EPOCHS, 'epoch'), out=RESULT_DIR)

    trainer.extend(extensions.dump_graph('main/loss'))
    trainer.extend(extensions.LogReport())
    trainer.extend(extensions.ProgressBar(update_interval=10))
    trainer.extend(extensions.Evaluator(test_iter, model, device=DEVICE))
    trainer.extend(
        extensions.PrintReport([
            'main/loss',
            'validation/main/loss',
            'main/cl_loss',
            'validation/main/cl_loss',
            'main/cl_acc',
            'validation/main/cl_acc',
            'main/pos_loss',
            'validation/main/pos_loss',
        ]))
    trainer.extend(extensions.snapshot_object(model, 'best_loss.npz'),
                   trigger=triggers.MinValueTrigger('validation/main/loss'))
    trainer.extend(extensions.snapshot_object(model,
                                              'best_classification.npz'),
                   trigger=triggers.MaxValueTrigger('validation/main/cl_acc'))
    trainer.extend(
        extensions.snapshot_object(model, 'best_position.npz'),
        trigger=triggers.MinValueTrigger('validation/main/pos_loss'))
    trainer.extend(extensions.snapshot_object(model, 'model_last.npz'),
                   trigger=(1, 'epoch'))

    trainer.run()
    def chainer_model_pipe(self, nn, train, valid, params):
        epoch = params['epoch']
        batch_size = params['batch_size']
        use_gpu = params['use_gpu']

        if 'fixed_base_w' in params.keys():
            fixed_base_w = params['fixed_base_w']
        else:
            fixed_base_w = False

        # Model Instance
        model = L.Classifier(nn)

        if use_gpu:
            device = 0
            model.to_gpu(device)
        else:
            device = -1

        # ミニバッチのインスタンスを作成
        train_iter = SerialIterator(train, batch_size)
        valid_iter = SerialIterator(valid,
                                    batch_size,
                                    repeat=False,
                                    shuffle=False)

        # Set Lerning
        optimizer = Adam()
        optimizer.setup(model)

        if fixed_base_w:
            model.predictor.base.disable_update()

        updater = StandardUpdater(train_iter, optimizer, device=device)

        trainer = Trainer(updater, (epoch, 'epoch'), out='result/cat_dog')
        trainer.extend(Evaluator(valid_iter, model, device=device))
        trainer.extend(LogReport(trigger=(1, 'epoch')))
        trainer.extend(PrintReport([
            'epoch', 'main/accuracy', 'validation/main/accuracy', 'main/loss',
            'validation/main/loss', 'elapsed_time'
        ]),
                       trigger=(1, 'epoch'))

        trainer.run()

        if use_gpu:
            model.to_cpu()

        return model
Esempio n. 6
0
def main():
    # input_size: 299
    #model = InceptionV4(dim_out=17)
    #model = InceptionV4(dim_out=17, base_filter_num=6, ablocks=2, bblocks=1, cblocks=1)
    #model = InceptionResNetV2(dim_out=17)
    #model = InceptionResNetV2(dim_out=17, base_filter_num=8, ablocks=1, bblocks=2, cblocks=1)

    # input_size: 224
    #model = VGGNetBN(17)  # VGGNet original size
    #model = VGGNetBN(17, 16)  # VGGNet 1/4 of filter num
    #model = GoogLeNetBN(17)  # GoogLeNet original size
    #model = GoogLeNetBN(17, 16)  # GoogleNet 1/2 filter num
    #model = GoogLeNetBN(17, 8)  # GoogleNet 1/4 filter num
    #model = ResNet50(17)  # ResNet50 original size
    #model = ResNet50(17, 32)  # ResNet50 1/2 size
    #model = ResNet50(17, 16)  # ResNet50 1/4 size
    #model = SqueezeNet(17)  #SqueezeNet original size
    #model = SqueezeNet(17, 8)  #SqueezeNet 1/2 filter num
    #model = MobileNet(17)  # MobileNet original size
    #model = MobileNet(17, 16)  # MobileNet 1/2 filter num
    #model = MobileNet(17, 8)  # MobileNet 1/4 filter num

    # input_size: 100
    #model = FaceClassifier100x100V2(n_classes=17)
    model = FaceClassifier100x100V(n_classes=17)

    optimizer = Adam()
    optimizer.setup(model)

    train_dataset = load_dataset('train.tsv', True)
    test_dataset = load_dataset('test.tsv')

    train_iter = SerialIterator(train_dataset, batch_size=BATCH_SIZE)
    test_iter = SerialIterator(test_dataset, batch_size=BATCH_SIZE, shuffle=False, repeat=False)
    updater = StandardUpdater(train_iter, optimizer, device=DEVICE)
    trainer = Trainer(updater, (N_EPOCHS, 'epoch'), out='result')

    trainer.extend(extensions.dump_graph('main/loss'))
    trainer.extend(extensions.LogReport())
    trainer.extend(extensions.ProgressBar(update_interval=10))
    trainer.extend(extensions.Evaluator(test_iter, model, device=DEVICE))
    trainer.extend(extensions.PrintReport(['main/loss', 'validation/main/loss', 'main/accuracy', 'validation/main/accuracy']))
    #trainer.extend(extensions.snapshot_object(model, 'snapshot_{.updater.epoch}.model'))

    trainer.run()

    chainer.serializers.save_npz('result/model.npz', model.to_cpu())
Esempio n. 7
0
def train():
    # train_txt = "/media/common-ns/New Volume/reseach/Dataset/OU-ISIR_by_Setoguchi/CV01.txt"
    train_dir = "/media/common-ns/New Volume/reseach/Dataset/OU-ISIR_by_Setoguchi/Gallery/CV01(Gallery)/*"
    train = load_OULP(path_dir=train_dir)

    # print(train[0])

    # 教師データ
    # train = train[0:1000]
    train = [i[0] for i in train]  # dataのパスとラベルのうち、dataだけ抜き出す
    train = datasets.TupleDataset(train, train)  # 同じパス画像のペアから、dataに変換してタプルにする

    batch_size = 195
    train_iter = chainer.iterators.SerialIterator(train, batch_size=batch_size)

    #model = L.Classifier(Autoencoder(), lossfun=F.mean_squared_error)
    model = L.Classifier(CAE(), lossfun=sce_loss)
    model.compute_accuracy = False
    optimizer = optimizers.Adam()
    optimizer.setup(model)

    updater = StandardUpdater(train_iter, optimizer, device=0)
    trainer = Trainer(
        updater,
        (1000, 'epoch'),
        out="result",
    )
    trainer.extend(extensions.LogReport())
    trainer.extend(extensions.PrintReport(['epoch', 'main/loss']))
    trainer.extend(extensions.snapshot(), trigger=(200, 'epoch'))
    trainer.extend(extensions.snapshot_object(
        target=model, filename='model_snapshot_{.updater.iteration}'),
                   trigger=(250, 'epoch'))
    trainer.extend(extensions.ProgressBar())

    trainer.run()
    serializers.save_npz(
        "/home/common-ns/setoguchi/chainer_files/Convolutional_Auto_Encoder/CAE_FC_model",
        model)
Esempio n. 8
0
def run_linear_network(loss_fn, alpha=0.3, batch_size=2):

    # Get data
    np.random.seed(42)
    dataset = get_dataset()
    iterator = SerialIterator(dataset, batch_size, repeat=True, shuffle=True)

    # Set up network and loss
    predictor = L.Linear(None, 1)
    ranker = Ranker(predictor)
    loss = Loss(ranker, loss_fn)

    # Optimizer
    optimizer = Adam(alpha=alpha)
    optimizer.setup(loss)
    updater = StandardUpdater(iterator, optimizer, converter=zeropad_concat)
    trainer = Trainer(updater, (100, 'epoch'))
    log_report = extensions.LogReport(log_name=None)
    trainer.extend(log_report)
    np.random.seed(42)
    trainer.run()
    last_ndcg = log_report.log[-1]['ndcg']
    return last_ndcg
Esempio n. 9
0
def main():
    parser = argparse.ArgumentParser(description='training mnist')
    parser.add_argument('--gpu', '-g', default=-1, type=int,
                        help='GPU ID (negative value indicates CPU)')
    parser.add_argument('--epoch', '-e', type=int, default=100,
                        help='Number of sweeps over the dataset to train')
    parser.add_argument('--batchsize', '-b', type=int, default=8,
                        help='Number of images in each mini-batch')
    parser.add_argument('--seed', '-s', type=int, default=0,
                        help='Random seed')
    parser.add_argument('--report_trigger', '-rt', type=str, default='1e',
                        help='Interval for reporting(Ex.100i, default:1e)')
    parser.add_argument('--save_trigger', '-st', type=str, default='1e',
                        help='Interval for saving the model(Ex.100i, default:1e)')
    parser.add_argument('--load_model', '-lm', type=str, default=None,
                        help='Path of the model object to load')
    parser.add_argument('--load_optimizer', '-lo', type=str, default=None,
                        help='Path of the optimizer object to load')
    args = parser.parse_args()

    start_time = datetime.now()
    save_dir = Path('output/{}'.format(start_time.strftime('%Y%m%d_%H%M')))

    random.seed(args.seed)
    np.random.seed(args.seed)
    cupy.random.seed(args.seed)

    backbone = 'mobilenet'
    model = ModifiedClassifier(DeepLab(n_class=13, task='semantic', backbone=backbone), lossfun=F.softmax_cross_entropy)
    if args.load_model is not None:
        serializers.load_npz(args.load_model, model)

    if args.gpu >= 0:
        chainer.cuda.get_device_from_id(args.gpu).use()
        model.to_gpu()

    optimizer = optimizers.Adam(alpha=1e-3)
    optimizer.setup(model)
    if args.load_optimizer is not None:
        serializers.load_npz(args.load_optimizer, optimizer)

    dir_path = './dataset/2D-3D-S/'
    augmentations = {'mirror': 0.5, 'flip': 0.5}
    train_data = Stanford2D3DS(dir_path, 'semantic', area='1 2 3 4', train=True)
    train_data.set_augmentations(crop=513, augmentations=augmentations)
    valid_data = Stanford2D3DS(dir_path, 'semantic', area='6', train=False, n_data=100)
    valid_data.set_augmentations(crop=513)

    train_iter = iterators.MultiprocessIterator(train_data, args.batchsize, n_processes=1)
    valid_iter = iterators.MultiprocessIterator(valid_data, args.batchsize, repeat=False, shuffle=False, n_processes=1)

    updater = StandardUpdater(train_iter, optimizer, device=args.gpu)
    trainer = Trainer(updater, (args.epoch, 'epoch'), out=save_dir)

    label_list = list(valid_data.label_dict.keys())[1:]
    report_trigger = (int(args.report_trigger[:-1]), 'iteration' if args.report_trigger[-1] == 'i' else 'epoch')
    trainer.extend(extensions.LogReport(trigger=report_trigger))
    trainer.extend(ModifiedEvaluator(valid_iter, model, label_names=label_list,
                                     device=args.gpu), name='val', trigger=report_trigger)

    trainer.extend(extensions.PrintReport(['epoch', 'iteration', 'main/loss', 'main/acc', 'val/main/loss',
                                           'val/main/acc', 'val/main/mean_class_acc', 'val/main/miou',
                                           'elapsed_time']), trigger=report_trigger)

    trainer.extend(extensions.PlotReport(['main/loss', 'val/main/loss'], x_key=report_trigger[1],
                                         marker='.', file_name='loss.png', trigger=report_trigger))
    trainer.extend(extensions.PlotReport(['main/acc', 'val/main/acc'], x_key=report_trigger[1],
                                         marker='.', file_name='accuracy.png', trigger=report_trigger))
    class_accuracy_report = ['val/main/mean_class_acc']
    class_accuracy_report.extend(['val/main/class_acc/{}'.format(label) for label in label_list])
    class_iou_report = ['val/main/miou']
    class_iou_report.extend(['val/main/iou/{}'.format(label) for label in label_list])
    trainer.extend(extensions.PlotReport(class_accuracy_report, x_key=report_trigger[1],
                                         marker='.', file_name='class_accuracy.png', trigger=report_trigger))
    trainer.extend(extensions.PlotReport(class_iou_report, x_key=report_trigger[1],
                                         marker='.', file_name='class_iou.png', trigger=report_trigger))

    save_trigger = (int(args.save_trigger[:-1]), 'iteration' if args.save_trigger[-1] == 'i' else 'epoch')
    trainer.extend(extensions.snapshot_object(model, filename='model_{0}-{{.updater.{0}}}.npz'
                                              .format(save_trigger[1])), trigger=save_trigger)
    trainer.extend(extensions.snapshot_object(optimizer, filename='optimizer_{0}-{{.updater.{0}}}.npz'
                                              .format(save_trigger[1])), trigger=save_trigger)

    if save_dir.exists():
        shutil.rmtree(save_dir)
    save_dir.mkdir()
    (save_dir / 'training_details').mkdir()

    # Write parameters text
    with open(save_dir / 'training_details/train_params.txt', 'w') as f:
        f.write('model: {}(backbone: {})\n'.format(model.predictor.__class__.__name__, backbone))
        f.write('n_epoch: {}\n'.format(args.epoch))
        f.write('batch_size: {}\n'.format(args.batchsize))
        f.write('n_data_train: {}\n'.format(len(train_data)))
        f.write('n_data_val: {}\n'.format(len(valid_data)))
        f.write('seed: {}\n'.format(args.seed))
        if len(augmentations) > 0:
            f.write('[augmentation]\n')
            for process in augmentations:
                f.write('  {}: {}\n'.format(process, augmentations[process]))

    trainer.run()
Esempio n. 10
0
def main():
    args = create_args('train')
    result_dir = create_result_dir(args.model_name)

    # Prepare devices
    devices = get_gpu_dict(args.gpus)

    # Instantiate a model
    model = RegNet(epsilon=args.epsilon)

    # Instantiate a optimizer
    optimizer = get_optimizer(model, **vars(args))

    # Setting up datasets
    prep = TransformDataset(KITTI(args.kitti_path, 'train'),
                            CalibPrepare(args.init_pose))
    train, valid = split_dataset(
        prep, round(len(prep) * (1 - args.valid_proportion)))
    print("========== Model Parameters ==========")
    print("location loss weight (epsilon):", args.epsilon)
    print('train samples: {}, valid samples: {}'.format(
        len(train), len(valid)))

    # Iterator
    if DEBUG: Iterator = SerialIterator
    else: Iterator = MultiprocessIterator
    train_iter = Iterator(train, args.batchsize)
    valid_iter = Iterator(valid,
                          args.valid_batchsize,
                          repeat=False,
                          shuffle=False)

    # Updater
    if DEBUG:
        Updater = StandardUpdater(train_iter,
                                  optimizer,
                                  device=devices['main'])
    else:
        Updater = ParallelUpdater(train_iter, optimizer, devices=devices)
    trainer = Trainer(Updater, (args.epoch, 'epoch'), out=result_dir)

    # Extentions
    trainer.extend(extensions.Evaluator(valid_iter,
                                        model,
                                        device=devices['main']),
                   trigger=(args.valid_freq, 'epoch'))
    trainer.extend(extensions.snapshot(),
                   trigger=(args.snapshot_iter, 'iteration'))
    trainer.extend(extensions.LogReport(),
                   trigger=(args.show_log_iter, 'iteration'))
    trainer.extend(extensions.ProgressBar(update_interval=20))
    trainer.extend(
        extensions.PrintReport([
            'epoch', 'iteration', 'main/loss', 'validation/main/loss',
            'elapsed_time'
        ]))

    # Resume from snapshot
    if args.resume_from:
        chainer.serializers.load_npz(args.resume_from, trainer)

    # Train and save
    print("========== Training ==========")
    hook = CupyMemoryProfileHook()
    with hook:
        trainer.run()

    print("========== Saving ==========")
    chainer.serializers.save_hdf5(create_result_file(args.model_name), model)
    print("Done.")
    print("========== Memory Profiling ==========")
    hook.print_report()
Esempio n. 11
0
def main():

    parser = argparse.ArgumentParser()
    parser.add_argument('--epoch',
                        '-e',
                        type=int,
                        default=20,
                        help='Number of sweeps over the dataset to train')
    parser.add_argument('--step_size',
                        '-ss',
                        type=int,
                        default=3000,
                        help='step_size for lr exponential')
    parser.add_argument('--gradclip',
                        '-c',
                        type=float,
                        default=5,
                        help='Gradient norm threshold to clip')
    parser.add_argument('--out',
                        '-o',
                        default='result',
                        help='Directory to output the result')
    parser.add_argument('--pretrain',
                        '-pr',
                        default='',
                        help='Resume the training from snapshot')
    parser.add_argument('--snapshot',
                        '-snap',
                        type=int,
                        default=100,
                        help='snapshot iteration for save checkpoint')
    parser.add_argument('--test_mode',
                        action='store_true',
                        help='Use tiny datasets for quick tests')
    parser.add_argument('--valid',
                        '-val',
                        default='',
                        help='Test directory path contains test txt file')
    parser.add_argument('--test',
                        '-tt',
                        default='graph_test',
                        help='Test directory path contains test txt file')
    parser.add_argument('--train',
                        '-tr',
                        default="D:/toy/",
                        help='Train directory path contains train txt file')
    parser.add_argument('--train_edge',
                        default="all",
                        help="train temporal/all to comparision")
    parser.add_argument('--database', default="BP4D", help="BP4D/DISFA")
    parser.add_argument(
        '--use_pure_python',
        action='store_true',
        help=
        'you can use pure python code to check whether your optimized code works correctly'
    )
    parser.add_argument('--lr', '-l', type=float, default=0.1)
    parser.add_argument("--profile",
                        "-p",
                        action="store_true",
                        help="whether to profile to examine speed bottleneck")
    parser.add_argument("--num_attrib",
                        type=int,
                        default=2048,
                        help="node feature dimension")
    parser.add_argument("--need_cache_graph",
                        "-ng",
                        action="store_true",
                        help="whether to cache factor graph to LRU cache")
    parser.add_argument("--eval_mode",
                        '-eval',
                        action="store_true",
                        help="whether to evaluation or not")
    parser.add_argument("--proc_num", "-pn", type=int, default=1)
    parser.add_argument("--resume",
                        action="store_true",
                        help="resume from pretrained model")
    parser.set_defaults(test=False)
    args = parser.parse_args()
    config.OPEN_CRF_CONFIG["use_pure_python"] = args.use_pure_python
    # because we modify config.OPEN_CRF_CONFIG thus will influence the open_crf layer
    from graph_learning.dataset.crf_pact_structure import CRFPackageStructure
    from graph_learning.dataset.graph_dataset import GraphDataset
    from graph_learning.extensions.opencrf_evaluator import OpenCRFEvaluator
    from graph_learning.dataset.graph_dataset_reader import GlobalDataSet
    from graph_learning.updater.bptt_updater import convert
    from graph_learning.extensions.AU_roi_label_split_evaluator import ActionUnitEvaluator
    if args.use_pure_python:

        from graph_learning.model.open_crf.pure_python.open_crf_layer import OpenCRFLayer
    else:
        from graph_learning.model.open_crf.cython.open_crf_layer import OpenCRFLayer

    print_interval = 1, 'iteration'
    val_interval = (5, 'iteration')
    adaptive_AU_database(args.database)
    root_dir = os.path.dirname(os.path.dirname(args.train))
    dataset = GlobalDataSet(num_attrib=args.num_attrib,
                            train_edge=args.train_edge)
    file_name = list(
        filter(lambda e: e.endswith(".txt"), os.listdir(args.train)))[0]
    sample = dataset.load_data(args.train + os.sep + file_name)
    print("pre load done")

    crf_pact_structure = CRFPackageStructure(
        sample, dataset, num_attrib=dataset.num_attrib_type, need_s_rnn=False)
    model = OpenCRFLayer(node_in_size=dataset.num_attrib_type,
                         weight_len=crf_pact_structure.num_feature)

    train_str = args.train
    if train_str[-1] == "/":
        train_str = train_str[:-1]
    trainer_keyword = os.path.basename(train_str)
    trainer_keyword_tuple = tuple(trainer_keyword.split("_"))
    LABEL_SPLIT = config.BP4D_LABEL_SPLIT if args.database == "BP4D" else config.DISFA_LABEL_SPLIT
    if trainer_keyword_tuple not in LABEL_SPLIT:
        return
    # assert "_" in trainer_keyword

    train_data = GraphDataset(args.train,
                              attrib_size=dataset.num_attrib_type,
                              global_dataset=dataset,
                              need_s_rnn=False,
                              need_cache_factor_graph=args.need_cache_graph,
                              get_geometry_feature=False)
    if args.proc_num == 1:
        train_iter = chainer.iterators.SerialIterator(train_data,
                                                      1,
                                                      shuffle=True)
    elif args.proc_num > 1:
        train_iter = chainer.iterators.MultiprocessIterator(
            train_data,
            batch_size=1,
            n_processes=args.proc_num,
            repeat=True,
            shuffle=True,
            n_prefetch=10,
            shared_mem=31457280)
    optimizer = chainer.optimizers.SGD(lr=args.lr)
    optimizer.setup(model)
    optimizer.add_hook(chainer.optimizer.GradientClipping(args.gradclip))
    optimizer.add_hook(chainer.optimizer.WeightDecay(rate=0.0005))
    updater = StandardUpdater(train_iter, optimizer, converter=convert)
    trainer = chainer.training.Trainer(updater, (args.epoch, 'epoch'),
                                       out=args.out)

    interval = 1
    if args.test_mode:
        chainer.config.train = False

    trainer.extend(
        PrintReport([
            'iteration',
            'epoch',
            'elapsed_time',
            'lr',
            'main/loss',
            "opencrf_val/main/hit",  #"opencrf_validation/main/U_hit",
            "opencrf_val/main/miss",  #"opencrf_validation/main/U_miss",
            "opencrf_val/main/F1",  #"opencrf_validation/main/U_F1"
            'opencrf_val/main/accuracy',
        ]),
        trigger=print_interval)
    trainer.extend(chainer.training.extensions.observe_lr(),
                   trigger=print_interval)
    trainer.extend(
        chainer.training.extensions.LogReport(
            trigger=print_interval,
            log_name="open_crf_{}.log".format(trainer_keyword)))

    optimizer_snapshot_name = "{0}_{1}_opencrf_optimizer.npz".format(
        trainer_keyword, args.database)
    model_snapshot_name = "{0}_{1}_opencrf_model.npz".format(
        trainer_keyword, args.database)
    trainer.extend(chainer.training.extensions.snapshot_object(
        optimizer, filename=optimizer_snapshot_name),
                   trigger=(args.snapshot, 'iteration'))

    trainer.extend(chainer.training.extensions.snapshot_object(
        model, filename=model_snapshot_name),
                   trigger=(args.snapshot, 'iteration'))

    if args.resume and os.path.exists(args.out + os.sep + model_snapshot_name):
        print("loading model_snapshot_name to model")
        chainer.serializers.load_npz(args.out + os.sep + model_snapshot_name,
                                     model)
    if args.resume and os.path.exists(args.out + os.sep +
                                      optimizer_snapshot_name):
        print("loading optimizer_snapshot_name to optimizer")
        chainer.serializers.load_npz(
            args.out + os.sep + optimizer_snapshot_name, optimizer)

    # trainer.extend(chainer.training.extensions.ProgressBar(update_interval=1))
    # trainer.extend(chainer.training.extensions.snapshot(),
    #                trigger=(args.snapshot, 'epoch'))

    # trainer.extend(chainer.training.extensions.ExponentialShift('lr', 0.9), trigger=(1, 'epoch'))

    if chainer.training.extensions.PlotReport.available():
        trainer.extend(chainer.training.extensions.PlotReport(
            ['main/loss'],
            file_name="{}_train_loss.png".format(trainer_keyword)),
                       trigger=(100, "iteration"))
        trainer.extend(chainer.training.extensions.PlotReport(
            ['opencrf_val/F1', 'opencrf_val/accuracy'],
            file_name="{}_val_f1.png".format(trainer_keyword)),
                       trigger=val_interval)

    if args.valid:
        valid_data = GraphDataset(
            args.valid,
            attrib_size=dataset.num_attrib_type,
            global_dataset=dataset,
            need_s_rnn=False,
            need_cache_factor_graph=args.need_cache_graph)
        validate_iter = chainer.iterators.SerialIterator(valid_data,
                                                         1,
                                                         repeat=False,
                                                         shuffle=False)
        evaluator = OpenCRFEvaluator(iterator=validate_iter,
                                     target=model,
                                     device=-1)
        trainer.extend(evaluator, trigger=val_interval)

    if args.profile:
        cProfile.runctx("trainer.run()", globals(), locals(), "Profile.prof")
        s = pstats.Stats("Profile.prof")
        s.strip_dirs().sort_stats("time").print_stats()
    else:
        trainer.run()
Esempio n. 12
0
def main():
    parser = argparse.ArgumentParser(description='training mnist')
    parser.add_argument('--gpu',
                        '-g',
                        default=-1,
                        type=int,
                        help='GPU ID (negative value indicates CPU)')
    parser.add_argument('--epoch',
                        '-e',
                        type=int,
                        default=300,
                        help='Number of sweeps over the dataset to train')
    parser.add_argument('--batchsize',
                        '-b',
                        type=int,
                        default=100,
                        help='Number of images in each mini-batch')
    parser.add_argument('--seed',
                        '-s',
                        type=int,
                        default=0,
                        help='Random seed')
    parser.add_argument('--report_trigger',
                        '-rt',
                        type=str,
                        default='1e',
                        help='Interval for reporting (Ex.100i/1e)')
    parser.add_argument('--save_trigger',
                        '-st',
                        type=str,
                        default='1e',
                        help='Interval for saving the model (Ex.100i/1e)')
    parser.add_argument('--load_model',
                        '-lm',
                        type=str,
                        default=None,
                        help='Path of the model object to load')
    parser.add_argument('--load_optimizer',
                        '-lo',
                        type=str,
                        default=None,
                        help='Path of the optimizer object to load')
    args = parser.parse_args()

    if not Path('output').exists():
        Path('output').mkdir()
    start_time = datetime.now()
    save_dir = Path('output/{}'.format(start_time.strftime('%Y%m%d_%H%M')))

    random.seed(args.seed)
    np.random.seed(args.seed)
    cupy.random.seed(args.seed)
    chainer.config.cudnn_deterministic = True

    model = L.Classifier(SEResNet50(n_class=101))
    # model = L.Classifier(SERes2Net50(n_class=101))
    # model = L.Classifier(GCResNet50(n_class=101))
    # model = L.Classifier(AAResNet50(n_class=101))

    if args.load_model is not None:
        serializers.load_npz(args.load_model, model)

    if args.gpu >= 0:
        chainer.cuda.get_device_from_id(args.gpu).use()
        model.to_gpu()

    optimizer = optimizers.Adam(alpha=1e-3,
                                weight_decay_rate=1e-4,
                                amsgrad=True)
    optimizer.setup(model)
    if args.load_optimizer is not None:
        serializers.load_npz(args.load_optimizer, optimizer)

    augmentation = {
        'HorizontalFlip': {
            'p': 0.5
        },
        'PadIfNeeded': {
            'p': 1.0,
            'min_height': 512,
            'min_width': 512
        },
        'Rotate': {
            'p': 1.0,
            'limit': 15,
            'interpolation': 1
        },
        'Resize': {
            'p': 1.0,
            'height': 248,
            'width': 248,
            'interpolation': 2
        },
        'RandomScale': {
            'p': 1.0,
            'scale_limit': 0.09,
            'interpolation': 2
        },
        'RandomCrop': {
            'p': 1.0,
            'height': 224,
            'width': 224
        },
    }
    resize = {
        'PadIfNeeded': {
            'p': 1.0,
            'min_height': 512,
            'min_width': 512
        },
        'Resize': {
            'p': 1.0,
            'height': 224,
            'width': 224,
            'interpolation': 2
        }
    }

    sl = slice(0, None, 5)
    train_data = Food101Dataset(augmentation=augmentation, drop_index=sl)
    valid_data = Food101Dataset(augmentation=resize, index=sl)

    train_iter = iterators.SerialIterator(train_data, args.batchsize)
    valid_iter = iterators.SerialIterator(valid_data,
                                          args.batchsize,
                                          repeat=False,
                                          shuffle=False)

    updater = StandardUpdater(train_iter, optimizer, device=args.gpu)
    trainer = Trainer(updater, (args.epoch, 'epoch'), out=save_dir)

    report_trigger = (int(args.report_trigger[:-1]), 'iteration'
                      if args.report_trigger[-1] == 'i' else 'epoch')
    trainer.extend(extensions.LogReport(trigger=report_trigger))
    trainer.extend(extensions.Evaluator(valid_iter, model, device=args.gpu),
                   name='val',
                   trigger=report_trigger)
    trainer.extend(extensions.PrintReport([
        'epoch', 'iteration', 'main/loss', 'main/accuracy', 'val/main/loss',
        'val/main/accuracy', 'elapsed_time'
    ]),
                   trigger=report_trigger)
    trainer.extend(
        extensions.PlotReport(['main/loss', 'val/main/loss'],
                              x_key=report_trigger[1],
                              marker='.',
                              file_name='loss.png',
                              trigger=report_trigger))
    trainer.extend(
        extensions.PlotReport(['main/accuracy', 'val/main/accuracy'],
                              x_key=report_trigger[1],
                              marker='.',
                              file_name='accuracy.png',
                              trigger=report_trigger))

    save_trigger = (int(args.save_trigger[:-1]),
                    'iteration' if args.save_trigger[-1] == 'i' else 'epoch')
    trainer.extend(extensions.snapshot_object(
        model,
        filename='model_{0}-{{.updater.{0}}}.npz'.format(save_trigger[1])),
                   trigger=save_trigger)
    trainer.extend(extensions.snapshot_object(
        optimizer,
        filename='optimizer_{0}-{{.updater.{0}}}.npz'.format(save_trigger[1])),
                   trigger=save_trigger)
    trainer.extend(extensions.ProgressBar())

    if save_dir.exists():
        shutil.rmtree(save_dir)
    save_dir.mkdir()

    # Write parameters text
    with open(save_dir / 'train_params.txt', 'w') as f:
        f.write('model: {}\n'.format(model.predictor.__class__.__name__))
        f.write('n_epoch: {}\n'.format(args.epoch))
        f.write('batch_size: {}\n'.format(args.batchsize))
        f.write('seed: {}\n'.format(args.seed))
        f.write('n_data_train: {}\n'.format(len(train_data)))
        f.write('n_data_val: {}\n'.format(len(valid_data)))
        f.write('augmentation: \n')
        for k, v in augmentation.items():
            f.write('  {}: {}\n'.format(k, v))

    trainer.run()
Esempio n. 13
0
def main(arg_list=None):
    parser = argparse.ArgumentParser(description='Chainer LSTM')
    parser.add_argument('--epoch',
                        '-e',
                        type=int,
                        nargs='+',
                        default=[20],
                        help='Number of sweeps over the dataset to train')
    parser.add_argument('--optimizer',
                        '-o',
                        nargs='+',
                        default=['momentumsgd'],
                        help='Optimizer (sgd, momentumsgd, adam)')
    parser.add_argument('--batchsize',
                        '-b',
                        type=int,
                        nargs='+',
                        default=[128],
                        help='Number of training points in each mini-batch')
    parser.add_argument('--lr',
                        type=float,
                        nargs='+',
                        default=[1e-2, 1e-3, 1e-4, 1e-5],
                        help='Learning rate')
    parser.add_argument(
        '--network',
        '-n',
        default='ff',
        help=
        'Neural network type, either "ff", "tdnn", "lstm", "zoneoutlstm", "peepholelstm" or "gru". Setting any recurrent network implies "--shuffle-sequences"'
    )
    parser.add_argument('--frequency',
                        '-f',
                        type=int,
                        default=-1,
                        help='Frequency of taking a snapshot')
    parser.add_argument('--gpu',
                        '-g',
                        type=int,
                        default=0,
                        help='GPU ID (negative value indicates CPU)')
    parser.add_argument('--out',
                        default='result',
                        help='Directory to output the result')
    parser.add_argument('--resume',
                        '-r',
                        default='',
                        help='Resume the training from snapshot')
    parser.add_argument('--units',
                        '-u',
                        type=int,
                        nargs='+',
                        default=[1024],
                        help='Number of units')
    parser.add_argument('--layers',
                        '-l',
                        type=int,
                        default=2,
                        help='Number of hidden layers')
    parser.add_argument('--activation',
                        '-a',
                        default='relu',
                        help='FF activation function (sigmoid, tanh or relu)')
    parser.add_argument('--tdnn-ksize',
                        type=int,
                        nargs='+',
                        default=[5],
                        help='TDNN kernel size')
    parser.add_argument('--bproplen',
                        type=int,
                        default=20,
                        help='Backpropagation length')
    parser.add_argument('--timedelay',
                        type=int,
                        default=0,
                        help='Delay target values by this many time steps')
    parser.add_argument('--noplot',
                        dest='plot',
                        action='store_false',
                        help='Disable PlotReport extension')
    parser.add_argument('--splice', type=int, default=0, help='Splicing size')
    parser.add_argument(
        '--dropout',
        '-d',
        type=float,
        nargs='+',
        default=[0],
        help=
        'Dropout rate (0 to disable). In case of Zoneout LSTM, this parameter has 2 arguments: c_ratio h_ratio'
    )
    parser.add_argument('--ft',
                        default='final.feature_transform',
                        help='Kaldi feature transform file')
    parser.add_argument('--tri', action='store_true', help='Use triphones')
    parser.add_argument(
        '--shuffle-sequences',
        action='store_true',
        help=
        'True if sequences should be shuffled as a whole, otherwise all frames will be shuffled independent of each other'
    )
    parser.add_argument(
        '--data-dir',
        default='data/fmllr',
        help=
        'Data directory, this will be prepended to data files and feature transform'
    )
    parser.add_argument(
        '--offset-dir',
        default='data',
        help='Data directory, this will be prepended to offset files')
    parser.add_argument(
        '--target-dir',
        default='data/targets',
        help='Data directory, this will be prepended to target files')
    parser.add_argument(
        '--ivector-dir',
        help='Data directory, this will be prepended to ivector files')
    parser.add_argument('--data', default='data_{}.npy', help='Training data')
    parser.add_argument('--offsets',
                        default='offsets_{}.npy',
                        help='Training offsets')
    parser.add_argument('--targets',
                        default='targets_{}.npy',
                        help='Training targets')
    parser.add_argument('--ivectors',
                        default='ivectors_{}.npy',
                        help='Training ivectors')
    parser.add_argument('--no-validation',
                        dest='use_validation',
                        action='store_false',
                        help='Do not evaluate validation data while training')
    parser.add_argument('--train-fold',
                        type=int,
                        help='Train fold network with this ID')
    parser.add_argument('--train-rpl',
                        action='store_true',
                        help='Train RPL layer')
    parser.add_argument('--rpl-model',
                        default="result_rpl/model",
                        help='RPL layer model')
    parser.add_argument('--fold-data-dir',
                        default="fold_data",
                        help='Directory with fold input data')
    parser.add_argument('--fold-output-dir',
                        default="fold_data_out",
                        help='Directory with predicted fold output')
    parser.add_argument('--fold-model-dir',
                        default="fold_models",
                        help='Directory with output fold model')
    parser.add_argument(
        '--fold-data-pattern',
        default='data_{0}.npy',
        help=
        'Filename pattern of each fold data, {0} will be replaced by fold ID')
    parser.add_argument('--fold-offset-pattern',
                        default='offsets_{0}.npy',
                        help='Filename pattern of each fold offset')
    parser.add_argument('--fold-target-pattern',
                        default='targets_{0}.npy',
                        help='Filename pattern of each fold targets')
    parser.add_argument(
        '--fold-ivector-pattern',
        default='ivectors_{}.npy',
        help=
        'Filename pattern of each fold i-vectors file, {} will be replaced by fold ID'
    )
    parser.add_argument('--fold-output-pattern',
                        default='data_{0}.npy',
                        help='Filename pattern of each fold network output')
    parser.add_argument('--fold-network-pattern',
                        default='fold_{0}.npz',
                        help='Filename pattern of each fold network')
    parser.add_argument('--no-progress',
                        action='store_true',
                        help='Disable progress bar')
    if arg_list is not None:
        args = parser.parse_args(list(map(str, arg_list)))
    else:
        args = parser.parse_args()

    # set options implied by other options
    if is_nn_recurrent(args.network):
        args.shuffle_sequences = True

    # create output directories
    Path(args.out).mkdir(exist_ok=True, parents=True)
    if args.train_fold is not None:
        file_out = Path(args.fold_model_dir,
                        args.fold_network_pattern.format(args.train_fold))
        Path(file_out.parent).mkdir(exist_ok=True, parents=True)

    # print arguments to the file
    with open(args.out + "/args.txt", "w") as f:
        for attr in dir(args):
            if not attr.startswith('_'):
                f.write('# {}: {}\n'.format(attr, getattr(args, attr)))
        f.write(' '.join(
            map(lambda x: "'" + x + "'" if ' ' in x else x, sys.argv)) + '\n')

    # print arguments to stdout
    for attr in dir(args):
        if not attr.startswith('_'):
            print('# {}: {}'.format(attr, getattr(args, attr)))
    print('')

    # input feature vector length
    num_classes = 1909 if args.tri else 39

    # create model
    if args.train_rpl:
        model = RPL4(num_classes)
        model_cls = L.Classifier(model)
    else:
        if args.activation == "sigmoid":
            activation = F.sigmoid
        elif args.activation == "tanh":
            activation = F.tanh
        elif args.activation == "relu":
            activation = F.relu
        else:
            print("Wrong activation function specified")
            return
        model = get_nn(args.network, args.layers, args.units, num_classes,
                       activation, args.tdnn_ksize, args.dropout)

        # classifier reports softmax cross entropy loss and accuracy at every
        # iteration, which will be used by the PrintReport extension below.
        model_cls = L.Classifier(model)
    if args.gpu >= 0:
        # make a specified GPU current
        chainer.cuda.get_device_from_id(args.gpu).use()
        model_cls.to_gpu()  # copy the model to the GPU

    offsets = offsets_dev = None

    if args.train_rpl:
        # load training data
        fold = 0
        x = []
        y = []

        while True:
            x_file = Path(args.fold_output_dir,
                          args.fold_output_pattern.format(fold))
            y_file = Path(args.fold_data_dir,
                          args.fold_target_pattern.format(fold))
            if not x_file.is_file() or not y_file.is_file():
                break
            print("Loading fold {} data".format(fold))
            x_ = np.load(str(x_file))
            y_ = np.load(str(y_file))
            x.append(x_)
            y.append(y_)
            fold += 1

        if fold == 0:
            print("Error: No fold data found")
            return

        x = np.concatenate(x, axis=0)
        y = np.concatenate(y, axis=0)

        if args.use_validation:  #TODO: use args.data instead of args.dev_data
            x_dev = np.load(str(Path(args.data_dir, args.data.format("dev"))))
            # offsets_dev = loadBin(str(Path(args.datadir, args.dev_offsets)), np.int32)
            y_dev = np.load(
                str(Path(args.target_dir, args.targets.format("dev"))))
    else:
        # load training data
        ivectors = None
        ivectors_dev = None
        if args.train_fold is not None:
            x = []
            offsets = [0]
            y = []
            ivectors = []
            num = 0
            fold = 0
            while True:
                if fold != args.train_fold:
                    x_file = Path(args.fold_data_dir,
                                  args.fold_data_pattern.format(fold))
                    if not x_file.is_file():
                        break
                    offsets_file = Path(args.fold_data_dir,
                                        args.fold_offset_pattern.format(fold))
                    y_file = Path(args.fold_data_dir,
                                  args.fold_target_pattern.format(fold))
                    if args.ivector_dir is not None:
                        ivectors_file = Path(
                            args.fold_data_dir,
                            args.fold_ivector_pattern.format(fold))
                        if not ivectors_file.is_file():
                            print("Error: missing ivectors for fold data {}".
                                  format(fold))
                            return

                    print("Loading fold {} data".format(fold))
                    x_fold = np.load(str(x_file))
                    x.append(x_fold)
                    if is_nn_recurrent(args.network):
                        offsets_fold = np.load(str(offsets_file))
                        offsets.extend(offsets_fold[1:] + num)
                    y_fold = np.load(str(y_file))
                    y.append(y_fold)
                    if args.ivector_dir is not None:
                        ivectors_fold = np.load(str(ivectors_file))
                        ivectors.append(ivectors_fold)
                    num += x_fold.shape[0]
                fold += 1

            if len(x) == 0:
                print("Error: No fold data found")
                return

            x = np.concatenate(x, axis=0)
            if is_nn_recurrent(args.network):
                offsets = np.array(offsets, dtype=np.int32)
            y = np.concatenate(y, axis=0)
            if args.ivector_dir is not None:
                ivectors = np.concatenate(ivectors, axis=0)
        else:
            x = np.load(str(Path(args.data_dir, args.data.format("train"))))
            if is_nn_recurrent(args.network):
                offsets = np.load(
                    str(Path(args.offset_dir, args.offsets.format("train"))))
            y = np.load(
                str(Path(args.target_dir, args.targets.format("train"))))
            if args.ivector_dir is not None:
                ivectors = np.load(
                    str(Path(args.ivector_dir, args.ivectors.format("train"))))

        if args.use_validation:
            x_dev = np.load(str(Path(args.data_dir, args.data.format("dev"))))
            if is_nn_recurrent(args.network):
                offsets_dev = np.load(
                    str(Path(args.offset_dir, args.offsets.format("dev"))))
            y_dev = np.load(
                str(Path(args.target_dir, args.targets.format("dev"))))
            if args.ivector_dir is not None:
                ivectors_dev = np.load(
                    str(Path(args.ivector_dir, args.ivectors.format("dev"))))

        # apply splicing
        if args.network == "tdnn":
            splice = (sum(args.tdnn_ksize) - len(args.tdnn_ksize)) // 2
        else:
            splice = args.splice
        if splice > 0:
            x = splicing(x, range(-splice, splice + 1))
            x_dev = splicing(x_dev, range(-splice, splice + 1))

        # load feature transform
        if not args.ft and args.ft != '-':
            ft = loadKaldiFeatureTransform(str(Path(args.data_dir, args.ft)))
            if is_nn_recurrent(
                    args.network
            ):  # select transform middle frame if the network is recurrent
                dim = ft["shape"][1]
                zi = ft["shifts"].index(0)
                ft["rescale"] = ft["rescale"][zi * dim:(zi + 1) * dim]
                ft["addShift"] = ft["addShift"][zi * dim:(zi + 1) * dim]
                ft["shape"][0] = dim
                ft["shifts"] = [0]
            elif args.network == "tdnn":
                dim = ft["shape"][1]
                zi = ft["shifts"].index(0)
                winlen = 2 * splice + 1
                ft["rescale"] = np.tile(ft["rescale"][zi * dim:(zi + 1) * dim],
                                        winlen)
                ft["addShift"] = np.tile(
                    ft["addShift"][zi * dim:(zi + 1) * dim], winlen)
                ft["shape"][0] = dim * winlen
                ft["shifts"] = list(range(-splice, splice + 1))
            # apply feature transform
            x = applyKaldiFeatureTransform(x, ft)
            if args.use_validation:
                x_dev = applyKaldiFeatureTransform(x_dev, ft)

        if ivectors is not None:
            x = np.concatenate((x, ivectors), axis=1)
        if ivectors_dev is not None:
            x_dev = np.concatenate((x_dev, ivectors_dev), axis=1)

        # shift the input dataset according to time delay
        if is_nn_recurrent(args.network) and args.timedelay != 0:
            x, y, offsets = apply_time_delay(x, y, offsets, args.timedelay)
            if args.use_validation:
                x_dev, y_dev, offsets_dev = apply_time_delay(
                    x_dev, y_dev, offsets_dev, args.timedelay)

    # create chainer datasets
    train_dataset = chainer.datasets.TupleDataset(x, y)
    if args.use_validation:
        dev_dataset = chainer.datasets.TupleDataset(x_dev, y_dev)

    # prepare train stages
    train_stages_len = max(len(args.batchsize), len(args.lr))
    train_stages = [{
        'epoch': index_padded(args.epoch, i),
        'opt': index_padded(args.optimizer, i),
        'bs': index_padded(args.batchsize, i),
        'lr': index_padded(args.lr, i)
    } for i in range(train_stages_len)]

    for i, ts in enumerate(train_stages):
        if ts['opt'] == 'adam':  # learning rate not used, don't print it
            print(
                "=== Training stage {}: epoch = {}, batchsize = {}, optimizer = {}"
                .format(i, ts['epoch'], ts['bs'], ts['opt']))
        else:
            print(
                "=== Training stage {}: epoch = {}, batchsize = {}, optimizer = {}, learning rate = {}"
                .format(i, ts['epoch'], ts['bs'], ts['opt'], ts['lr']))

        # reset state to allow training with different batch size in each stage
        if not args.train_rpl and is_nn_recurrent(args.network):
            model.reset_state()

        # setup an optimizer
        if ts['opt'] == "sgd":
            optimizer = chainer.optimizers.SGD(lr=ts['lr'])
        elif ts['opt'] == "momentumsgd":
            optimizer = chainer.optimizers.MomentumSGD(lr=ts['lr'])
        elif ts['opt'] == "adam":
            optimizer = chainer.optimizers.Adam()
        else:
            print("Wrong optimizer specified: {}".format(ts['opt']))
            exit(1)
        optimizer.setup(model_cls)

        if args.shuffle_sequences:
            train_iter = SequenceShuffleIterator(train_dataset, offsets,
                                                 ts['bs'])
            if args.use_validation:
                dev_iter = SequenceShuffleIterator(dev_dataset,
                                                   None,
                                                   ts['bs'],
                                                   repeat=False,
                                                   shuffle=False)
        else:
            train_iter = SerialIterator(train_dataset, ts['bs'])
            if args.use_validation:
                dev_iter = SerialIterator(dev_dataset,
                                          ts['bs'],
                                          repeat=False,
                                          shuffle=False)

        # set up a trainer
        if is_nn_recurrent(args.network):
            updater = BPTTUpdater(train_iter,
                                  optimizer,
                                  args.bproplen,
                                  device=args.gpu)
        else:
            updater = StandardUpdater(train_iter, optimizer, device=args.gpu)
        if args.use_validation:
            stop_trigger = EarlyStoppingTrigger(ts['epoch'],
                                                key='validation/main/loss',
                                                eps=-0.001)
        else:
            stop_trigger = (ts['epoch'], 'epoch')
        trainer = training.Trainer(updater,
                                   stop_trigger,
                                   out="{}/{}".format(args.out, i))

        trainer.extend(model_saver)

        # evaluate the model with the development dataset for each epoch
        if args.use_validation:
            trainer.extend(
                extensions.Evaluator(dev_iter, model_cls, device=args.gpu))

        # dump a computational graph from 'loss' variable at the first iteration
        # the "main" refers to the target link of the "main" optimizer.
        trainer.extend(extensions.dump_graph('main/loss'))

        # take a snapshot for each specified epoch
        frequency = ts['epoch'] if args.frequency == -1 else max(
            1, args.frequency)
        trainer.extend(extensions.snapshot(), trigger=(frequency, 'epoch'))

        # write a log of evaluation statistics for each epoch
        trainer.extend(extensions.LogReport())

        # save two plot images to the result dir
        if args.plot and extensions.PlotReport.available():
            plot_vars_loss = ['main/loss']
            plot_vars_acc = ['main/accuracy']
            if args.use_validation:
                plot_vars_loss.append('validation/main/loss')
                plot_vars_acc.append('validation/main/accuracy')
            trainer.extend(
                extensions.PlotReport(plot_vars_loss,
                                      'epoch',
                                      file_name='loss.png'))
            trainer.extend(
                extensions.PlotReport(plot_vars_acc,
                                      'epoch',
                                      file_name='accuracy.png'))

        # print selected entries of the log to stdout
        # here "main" refers to the target link of the "main" optimizer again, and
        # "validation" refers to the default name of the Evaluator extension.
        # entries other than 'epoch' are reported by the Classifier link, called by
        # either the updater or the evaluator.
        if args.use_validation:
            print_report_vars = [
                'epoch', 'main/loss', 'validation/main/loss', 'main/accuracy',
                'validation/main/accuracy', 'elapsed_time'
            ]
        else:
            print_report_vars = [
                'epoch', 'main/loss', 'main/accuracy', 'elapsed_time'
            ]
        trainer.extend(extensions.PrintReport(print_report_vars))

        # print a progress bar to stdout
        # trainer.extend(extensions.ProgressBar())

        if args.resume:
            # Resume from a snapshot
            chainer.serializers.load_npz(args.resume, trainer)

        # Run the training
        trainer.run()

        # load the last model if the max epoch was not reached (that means early stopping trigger stopped training
        # because the validation loss increased)
        if updater.epoch_detail < ts['epoch']:
            chainer.serializers.load_npz("{}/{}/model_tmp".format(args.out, i),
                                         model_cls)

        # remove temporary model from this training stage
        os.remove("{}/{}/model_tmp".format(args.out, i))

    # save the final model
    chainer.serializers.save_npz("{}/model".format(args.out), model_cls)
    if args.train_fold is not None:
        chainer.serializers.save_npz(
            str(
                Path(args.fold_model_dir,
                     args.fold_network_pattern.format(args.train_fold))),
            model_cls)
Esempio n. 14
0
def main():
    parser = argparse.ArgumentParser(description='training mnist')
    parser.add_argument('--gpu',
                        '-g',
                        default=-1,
                        type=int,
                        help='GPU ID (negative value indicates CPU)')
    parser.add_argument('--epoch',
                        '-e',
                        type=int,
                        default=300,
                        help='Number of sweeps over the dataset to train')
    parser.add_argument('--batchsize',
                        '-b',
                        type=int,
                        default=100,
                        help='Number of images in each mini-batch')
    parser.add_argument('--seed',
                        '-s',
                        type=int,
                        default=0,
                        help='Random seed')
    parser.add_argument('--n_fold',
                        '-nf',
                        type=int,
                        default=5,
                        help='n_fold cross validation')
    parser.add_argument('--fold', '-f', type=int, default=1)
    parser.add_argument('--out_dir_name',
                        '-dn',
                        type=str,
                        default=None,
                        help='Name of the output directory')
    parser.add_argument('--report_trigger',
                        '-rt',
                        type=str,
                        default='1e',
                        help='Interval for reporting(Ex.100i, default:1e)')
    parser.add_argument('--save_trigger',
                        '-st',
                        type=str,
                        default='1e',
                        help='Interval for saving the model'
                        '(Ex.100i, default:1e)')
    parser.add_argument('--load_model',
                        '-lm',
                        type=str,
                        default=None,
                        help='Path of the model object to load')
    parser.add_argument('--load_optimizer',
                        '-lo',
                        type=str,
                        default=None,
                        help='Path of the optimizer object to load')
    args = parser.parse_args()

    if args.out_dir_name is None:
        start_time = datetime.now()
        out_dir = Path('output/{}'.format(start_time.strftime('%Y%m%d_%H%M')))
    else:
        out_dir = Path('output/{}'.format(args.out_dir_name))

    random.seed(args.seed)
    np.random.seed(args.seed)
    cupy.random.seed(args.seed)
    chainer.config.cudnn_deterministic = True

    # model = ModifiedClassifier(SEResNeXt50())
    # model = ModifiedClassifier(SERes2Net50())
    model = ModifiedClassifier(SEResNeXt101())

    if args.load_model is not None:
        serializers.load_npz(args.load_model, model)

    if args.gpu >= 0:
        chainer.cuda.get_device_from_id(args.gpu).use()
        model.to_gpu()

    optimizer = optimizers.MomentumSGD(lr=0.1, momentum=0.9)
    optimizer.setup(model)
    optimizer.add_hook(chainer.optimizer_hooks.WeightDecay(1e-4))
    if args.load_optimizer is not None:
        serializers.load_npz(args.load_optimizer, optimizer)

    n_fold = args.n_fold
    slices = [slice(i, None, n_fold) for i in range(n_fold)]
    fold = args.fold - 1

    # model1
    # augmentation = [
    #     ('Rotate', {'p': 0.8, 'limit': 5}),
    #     ('PadIfNeeded', {'p': 0.5, 'min_height': 28, 'min_width': 30}),
    #     ('PadIfNeeded', {'p': 0.5, 'min_height': 30, 'min_width': 28}),
    #     ('Resize', {'p': 1.0, 'height': 28, 'width': 28}),
    #     ('RandomScale', {'p': 1.0, 'scale_limit': 0.1}),
    #     ('PadIfNeeded', {'p': 1.0, 'min_height': 32, 'min_width': 32}),
    #     ('RandomCrop', {'p': 1.0, 'height': 28, 'width': 28}),
    #     ('Mixup', {'p': 0.5}),
    #     ('Cutout', {'p': 0.5, 'num_holes': 4, 'max_h_size': 4,
    #                 'max_w_size': 4}),
    # ]
    # resize = None

    # model2
    augmentation = [
        ('Rotate', {
            'p': 0.8,
            'limit': 5
        }),
        ('PadIfNeeded', {
            'p': 0.5,
            'min_height': 28,
            'min_width': 32
        }),
        ('PadIfNeeded', {
            'p': 0.5,
            'min_height': 32,
            'min_width': 28
        }),
        ('Resize', {
            'p': 1.0,
            'height': 32,
            'width': 32
        }),
        ('RandomScale', {
            'p': 1.0,
            'scale_limit': 0.1
        }),
        ('PadIfNeeded', {
            'p': 1.0,
            'min_height': 36,
            'min_width': 36
        }),
        ('RandomCrop', {
            'p': 1.0,
            'height': 32,
            'width': 32
        }),
        ('Mixup', {
            'p': 0.5
        }),
        ('Cutout', {
            'p': 0.5,
            'num_holes': 4,
            'max_h_size': 4,
            'max_w_size': 4
        }),
    ]
    resize = [('Resize', {'p': 1.0, 'height': 32, 'width': 32})]

    train_data = KMNIST(augmentation=augmentation,
                        drop_index=slices[fold],
                        pseudo_labeling=True)
    valid_data = KMNIST(augmentation=resize, index=slices[fold])

    train_iter = iterators.SerialIterator(train_data, args.batchsize)
    valid_iter = iterators.SerialIterator(valid_data,
                                          args.batchsize,
                                          repeat=False,
                                          shuffle=False)

    updater = StandardUpdater(train_iter, optimizer, device=args.gpu)
    trainer = Trainer(updater, (args.epoch, 'epoch'), out=out_dir)

    report_trigger = (int(args.report_trigger[:-1]), 'iteration'
                      if args.report_trigger[-1] == 'i' else 'epoch')
    trainer.extend(extensions.LogReport(trigger=report_trigger))
    trainer.extend(extensions.Evaluator(valid_iter, model, device=args.gpu),
                   name='val',
                   trigger=report_trigger)
    trainer.extend(extensions.PrintReport([
        'epoch', 'iteration', 'main/loss', 'main/accuracy', 'val/main/loss',
        'val/main/accuracy', 'elapsed_time'
    ]),
                   trigger=report_trigger)
    trainer.extend(
        extensions.PlotReport(['main/loss', 'val/main/loss'],
                              x_key=report_trigger[1],
                              marker='.',
                              file_name='loss.png',
                              trigger=report_trigger))
    trainer.extend(
        extensions.PlotReport(['main/accuracy', 'val/main/accuracy'],
                              x_key=report_trigger[1],
                              marker='.',
                              file_name='accuracy.png',
                              trigger=report_trigger))

    save_trigger = (int(args.save_trigger[:-1]),
                    'iteration' if args.save_trigger[-1] == 'i' else 'epoch')
    trainer.extend(extensions.snapshot_object(
        model,
        filename='model_{0}-{{.updater.{0}}}.npz'.format(save_trigger[1])),
                   trigger=save_trigger)
    trainer.extend(extensions.snapshot_object(
        optimizer,
        filename='optimizer_{0}-{{.updater.{0}}}.npz'.format(save_trigger[1])),
                   trigger=save_trigger)
    trainer.extend(extensions.ProgressBar())
    trainer.extend(CosineAnnealing(lr_max=0.1, lr_min=1e-6, T_0=20),
                   trigger=(1, 'epoch'))

    best_model_trigger = triggers.MaxValueTrigger('val/main/accuracy',
                                                  trigger=(1, 'epoch'))
    trainer.extend(extensions.snapshot_object(model,
                                              filename='best_model.npz'),
                   trigger=best_model_trigger)
    trainer.extend(extensions.snapshot_object(optimizer,
                                              filename='best_optimizer.npz'),
                   trigger=best_model_trigger)
    best_loss_model_trigger = triggers.MinValueTrigger('val/main/loss',
                                                       trigger=(1, 'epoch'))
    trainer.extend(extensions.snapshot_object(model,
                                              filename='best_loss_model.npz'),
                   trigger=best_loss_model_trigger)
    trainer.extend(extensions.snapshot_object(
        optimizer, filename='best_loss_optimizer.npz'),
                   trigger=best_loss_model_trigger)

    if out_dir.exists():
        shutil.rmtree(out_dir)
    out_dir.mkdir()

    # Write parameters text
    with open(out_dir / 'train_params.txt', 'w') as f:
        f.write('model: {}\n'.format(model.predictor.__class__.__name__))
        f.write('n_epoch: {}\n'.format(args.epoch))
        f.write('batch_size: {}\n'.format(args.batchsize))
        f.write('n_data_train: {}\n'.format(len(train_data)))
        f.write('n_data_val: {}\n'.format(len(valid_data)))
        f.write('seed: {}\n'.format(args.seed))
        f.write('n_fold: {}\n'.format(args.n_fold))
        f.write('fold: {}\n'.format(args.fold))
        f.write('augmentation: \n')
        for process, param in augmentation:
            f.write('  {}: {}\n'.format(process, param))

    trainer.run()
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--gpu', type=int, default=-1)
    parser.add_argument('--batchsize', type=int, default=2)
    parser.add_argument('--epoch', type=int, default=10)
    parser.add_argument('--mini', action="store_true")
    parser.add_argument('--input_size', type=int, default=512)
    args = parser.parse_args()

    dtype = np.float32

    num_class = len(voc_bbox_label_names)

    data_augmentation_transform = DataAugmentationTransform(args.input_size)
    center_detection_transform = CenterDetectionTransform(args.input_size,
                                                          num_class,
                                                          4,
                                                          dtype=dtype)

    train = TransformDataset(
        ConcatenatedDataset(VOCBboxDataset(year='2007', split='trainval'),
                            VOCBboxDataset(year='2012', split='trainval')),
        data_augmentation_transform)
    train = TransformDataset(train, center_detection_transform)
    if args.mini:
        train = datasets.SubDataset(train, 0, 100)
    train_iter = chainer.iterators.MultiprocessIterator(train, args.batchsize)

    test = VOCBboxDataset(year='2007',
                          split='test',
                          use_difficult=True,
                          return_difficult=True)
    if args.mini:
        test = datasets.SubDataset(test, 0, 20)
    test_iter = chainer.iterators.SerialIterator(test,
                                                 args.batchsize,
                                                 repeat=False,
                                                 shuffle=False)

    detector = CenterDetector(HourglassNet,
                              args.input_size,
                              num_class,
                              dtype=dtype)
    #detector = CenterDetector(SimpleCNN, args.input_size, num_class)
    train_chain = CenterDetectorTrain(detector, 1, 0.1, 1)
    #train_chain = CenterDetectorTrain(detector, 1, 0, 0)

    if args.gpu >= 0:
        chainer.cuda.get_device_from_id(args.gpu).use()
        train_chain.to_gpu(args.gpu)

    optimizer = Adam(alpha=1.25e-4)
    #optimizer = SGD()
    optimizer.setup(train_chain)

    updater = StandardUpdater(train_iter, optimizer, device=args.gpu)

    log_interval = 1, 'epoch'
    log_interval_mini = 500, 'iteration'
    trainer = Trainer(updater, (args.epoch, 'epoch'), out=f"result{args.gpu}")
    trainer.extend(extensions.LogReport(trigger=log_interval_mini))
    trainer.extend(extensions.observe_lr(), trigger=log_interval)
    trainer.extend(extensions.PrintReport([
        'epoch',
        'iteration',
        'lr',
        'main/loss',
        'main/hm_loss',
        'main/wh_loss',
        'main/offset_loss',
        'main/hm_mae',
        'main/hm_pos_loss',
        'main/hm_neg_loss',
        'validation/main/map',
    ]),
                   trigger=log_interval_mini)
    trainer.extend(extensions.ProgressBar(update_interval=10))
    trainer.extend(DetectionVOCEvaluator(test_iter,
                                         detector,
                                         use_07_metric=True,
                                         label_names=voc_bbox_label_names),
                   trigger=(1, 'epoch'))
    trainer.extend(extensions.snapshot_object(
        detector, 'detector{.updater.epoch:03}.npz'),
                   trigger=(1, 'epoch'))

    trainer.run()
Esempio n. 16
0
def main(args):
    args = prepare_log_dir(args)

    # set dtype for training
    chainer.global_config.dtype = args.dtype

    train_dataset = BaseImageDataset(
        args.train_file,
        args.image_size,
        root=os.path.dirname(args.train_file),
        dtype=chainer.get_dtype(),
    )

    validation_dataset = BaseImageDataset(
        args.val_file,
        args.image_size,
        root=os.path.dirname(args.val_file),
        dtype=chainer.get_dtype(),
    )

    train_iter = MultiprocessIterator(train_dataset, batch_size=args.batch_size, shuffle=True)
    validation_iter = MultiprocessIterator(validation_dataset, batch_size=args.batch_size, repeat=False)

    net = HandwritingNet()
    model = L.Classifier(net, label_key='has_text')

    tensorboard_handle = SummaryWriter(log_dir=args.log_dir)

    optimizer = Adam(alpha=args.learning_rate)
    optimizer.setup(model)
    if args.save_gradient_information:
        optimizer.add_hook(
            TensorboardGradientPlotter(tensorboard_handle, args.log_interval),
        )

    # log train information everytime we encouter a new epoch or args.log_interval iterations have been done
    log_interval_trigger = (
        lambda trainer:
        (trainer.updater.is_new_epoch or trainer.updater.iteration % args.log_interval == 0)
        and trainer.updater.iteration > 0
    )

    updater = StandardUpdater(train_iter, optimizer, device=args.gpu)
    trainer = Trainer(updater, (args.num_epoch, 'epoch'), out=args.log_dir)

    data_to_log = {
        'log_dir': args.log_dir,
        'image_size': args.image_size,
        # 'num_layers': args.num_layers,
        'keep_aspect_ratio': train_dataset.keep_aspect_ratio,
        'net': get_import_info(net),
    }

    for argument in filter(lambda x: not x.startswith('_'), dir(args)):
        data_to_log[argument] = getattr(args, argument)

    def backup_train_config(stats_cpu):
        iteration = stats_cpu.pop('iteration')
        epoch = stats_cpu.pop('epoch')
        elapsed_time = stats_cpu.pop('elapsed_time')

        for key, value in stats_cpu.items():
            tensorboard_handle.add_scalar(key, value, iteration)

        if iteration == args.log_interval:
            stats_cpu.update(data_to_log)

        stats_cpu.update({
            "epoch": epoch,
            "iteration": iteration,
            "elapsed_time": elapsed_time,
        })

    trainer.extend(
        extensions.snapshot_object(net, net.__class__.__name__ + '_{.updater.iteration}.npz'),
        trigger=lambda trainer: trainer.updater.is_new_epoch or trainer.updater.iteration % args.snapshot_interval == 0,
    )

    trainer.extend(
        extensions.snapshot(filename='trainer_snapshot', autoload=args.resume is not None),
        trigger=(args.snapshot_interval, 'iteration')
    )

    trainer.extend(
        TensorboardEvaluator(
            validation_iter,
            model,
            device=args.gpu,
            tensorboard_handle=tensorboard_handle
        ),
        trigger=(args.test_interval, 'iteration'),
    )

    logger = Logger(
        os.path.dirname(os.path.realpath(__file__)),
        args.log_dir,
        postprocess=backup_train_config,
        trigger=log_interval_trigger,
        exclusion_filters=['*logs*', '*.pyc', '__pycache__', '.git*'],
        resume=args.resume is not None,
    )

    trainer.extend(logger, trigger=log_interval_trigger)
    trainer.extend(
        extensions.PrintReport(
            ['epoch', 'iteration', 'main/loss', 'main/accuracy', 'validation/main/accuracy'],
            log_report='Logger',
        ),
        trigger=log_interval_trigger,
    )

    trainer.extend(extensions.ExponentialShift('alpha', 0.1, optimizer=optimizer), trigger=(10, 'epoch'))
    trainer.extend(extensions.ProgressBar(update_interval=10))

    trainer.run()
Esempio n. 17
0
def main(args):
    random.seed(0)
    np.random.seed(0)
    if args.gpu >= 0:
        cuda.get_device_from_id(args.gpu).use()
        cuda.cupy.random.seed(0)

    dataset, id2ene = load_dataset(args.dataset, args.features, args.redirects)
    print(f'# of examples in dataset: {len(dataset)}')

    def batch2tensors(batch, device):
        xp = cuda.cupy if device >= 0 else np

        xf = xp.zeros((len(batch), args.n_feature), dtype='f')
        xe = xp.zeros((len(batch), args.embed_size), dtype='f')
        t = xp.zeros((len(batch), len(id2ene)), dtype='i')

        for i, item in enumerate(batch):
            for feature_id in item['feature_ids']:
                if feature_id < args.n_feature:
                    xf[i, feature_id] = 1.0

            if item['embedding']:
                xe[i] = xp.array(item['embedding'], dtype='f')

            for ene_id in item['ene_ids']:
                t[i, ene_id] = 1

        x = xp.concatenate((xf, xe), axis=1)

        return x, t

    cv_datasets = get_cross_validation_datasets(dataset, args.cv)
    ys = []
    ts = []
    for split_idx, cv_dataset in enumerate(cv_datasets):
        print(f'cross validation ({split_idx + 1}/{len(cv_datasets)})')
        train, test = cv_dataset
        train_iter = SerialIterator(train, batch_size=args.batch)
        test_iter = SerialIterator(test,
                                   batch_size=args.batch,
                                   repeat=False,
                                   shuffle=False)

        model = ENEClassifier(in_size=args.n_feature + args.embed_size,
                              hidden_size=args.hidden_size,
                              out_size=len(id2ene))

        if args.gpu >= 0:
            model.to_gpu(args.gpu)

        optimizer = optimizers.Adam()
        optimizer.setup(model)
        updater = StandardUpdater(train_iter,
                                  optimizer,
                                  converter=batch2tensors,
                                  device=args.gpu)

        trainer = Trainer(updater, (args.epoch, 'epoch'), out=args.out_dir)
        trainer.extend(extensions.LogReport())
        trainer.extend(
            extensions.snapshot_object(
                model, filename='epoch_{.updater.epoch}.model'))
        trainer.extend(
            extensions.Evaluator(test_iter,
                                 model,
                                 converter=batch2tensors,
                                 device=args.gpu))
        trainer.extend(
            extensions.PrintReport(
                ['epoch', 'main/loss', 'validation/main/loss',
                 'elapsed_time']))
        trainer.extend(extensions.ProgressBar(update_interval=1))

        trainer.run()

        test_iter.reset()
        for batch in test_iter:
            x, t = batch2tensors(batch, device=args.gpu)
            with chainer.using_config('train', False):
                y = model.predict(x)

            ys.append(y)
            ts.append(t)

    y_all = F.concat(ys, axis=0)
    t_all = F.concat(ts, axis=0)

    prediction_matrix = (y_all.data >= 0.5).astype('f')
    reference_matrix = (t_all.data == 1).astype('f')
    accuracy_matrix = prediction_matrix * reference_matrix

    eb_pred = prediction_matrix.sum(
        axis=1)  # entity-based num. of predicted classes
    eb_ref = reference_matrix.sum(
        axis=1)  # entity-based num. of reference classes
    eb_acc = accuracy_matrix.sum(
        axis=1)  # entity-based num. of accurate classes

    eb_nopred = (eb_pred == 0.).astype('f')  # for avoiding zero-division
    eb_precision = (eb_acc / (eb_pred + eb_nopred)).mean()
    eb_recall = (eb_acc / eb_ref).mean()
    eb_f1 = (2 * eb_acc / (eb_pred + eb_ref)).mean()

    cb_pred = prediction_matrix.sum(
        axis=0)  # class-based num. of predicted examples
    cb_ref = reference_matrix.sum(
        axis=0)  # class-based num. of reference examples
    cb_acc = accuracy_matrix.sum(
        axis=0)  # class-based num. of accurate examples

    cb_nopred = (cb_pred == 0.).astype('f')  # for avoiding zero-division
    cb_macro_precision = (cb_acc / (cb_pred + cb_nopred)).mean()
    cb_macro_recall = (cb_acc / cb_ref).mean()
    cb_macro_f1 = (2 * cb_acc / (cb_pred + cb_ref)).mean()

    cb_micro_precision = cb_acc.sum() / cb_pred.sum()
    cb_micro_recall = cb_acc.sum() / cb_ref.sum()
    cb_micro_f1 = (2 * cb_acc.sum()) / (cb_pred.sum() + cb_ref.sum())

    print(f'Entity-based Precision:      {float(eb_precision):.2%}')
    print(f'Entity-based Recall:         {float(eb_recall):.2%}')
    print(f'Entity-based F1 score:       {float(eb_f1):.2%}')

    print(f'Class-based macro Precision: {float(cb_macro_precision):.2%}')
    print(f'Class-based macro Recall:    {float(cb_macro_recall):.2%}')
    print(f'Class-based macro F1 score:  {float(cb_macro_f1):.2%}')

    print(f'Class-based micro Precision: {float(cb_micro_precision):.2%}')
    print(f'Class-based micro Recall:    {float(cb_micro_recall):.2%}')
    print(f'Class-based micro F1 score:  {float(cb_micro_f1):.2%}')

    print(f'writing out classification results')
    with open(Path(args.out_dir) / 'classification_result.json', 'w') as fo:
        for i, item in tqdm(enumerate(dataset)):
            title = item['title']
            predicted_classes = [
                id2ene[j] for j, v in enumerate(prediction_matrix[i])
                if v == 1.0
            ]
            reference_classes = [
                id2ene[j] for j, v in enumerate(reference_matrix[i])
                if v == 1.0
            ]
            out = {
                'title': title,
                'prediction': predicted_classes,
                'reference': reference_classes
            }
            print(json.dumps(out, ensure_ascii=False), file=fo)
Esempio n. 18
0
def main():
    # TODO: cleanup and move to conf or remove conf
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "config",
        type=str,
        help=
        "Config file for Training params such as epochs, batch size, lr, etc.")
    parser.add_argument("model_name",
                        type=str,
                        help="The name under which the models will be saved")
    parser.add_argument(
        "dataset_dir",
        type=str,
        help="Directory where the images and the dataset description is stored"
    )
    parser.add_argument(
        "train_path",
        type=str,
        help="path to JSON file containing train set information")
    parser.add_argument(
        "test_path",
        type=str,
        help="path to JSON file containing test set information")
    parser.add_argument("-rs",
                        "--resnet-size",
                        type=int,
                        default="18",
                        help="Size of the used ResNet model")
    parser.add_argument("-ld",
                        "--log-dir",
                        type=str,
                        help="name of tensorboard logdir")
    parser.add_argument(
        "-ll",
        "--lossless",
        action="store_true",
        help="use lossless triplet loss instead of standard one")
    parser.add_argument(
        "-ce",
        "--ce-classifier",
        action="store_true",
        help="use a cross entropy classifier instead of triplet loss")
    parser.add_argument(
        "-llr",
        action="store_true",
        help="Evaluate triplets with log-likehood-ratios instead of kmeans/knn"
    )
    parser.add_argument("-eo",
                        "--eval-only",
                        type=str,
                        help="only evaluate the given model")
    args = parser.parse_args()

    ###################### INIT ############################
    resnet_size = args.resnet_size
    base_model = PooledResNet(resnet_size)

    # parse config file
    plot_loss = True
    config = configparser.ConfigParser()
    config.read(args.config)

    batch_size = int(config["TRAINING"]["batch_size"])
    epochs = int(config["TRAINING"]["epochs"])
    lr = float(config["TRAINING"]["lr"])
    gpu = config["TRAINING"]["gpu"]

    xp = cuda.cupy if int(gpu) >= 0 else np

    model_name = args.model_name

    # Init tensorboard writer
    if args.log_dir is not None:
        if args.eval_only is not None:
            log_dir = f"runs/{args.log_dir}_eval"
        else:
            log_dir = f"runs/{args.log_dir}"
        if os.path.exists(log_dir):
            user_input = input("Log dir not empty. Clear log dir? (y/N)")
            if user_input == "y":
                shutil.rmtree(log_dir)
        writer = SummaryWriter(log_dir)
    else:
        writer = SummaryWriter()
        log_dir = writer.logdir

    with open(os.path.join(writer.logdir, "args.log"), "w") as log_file:
        log_file.write(f"{' '.join(sys.argv[1:])}\n")
    shutil.copy(args.config, writer.logdir)

    print("MODEL_NAME:", model_name, "BATCH_SIZE:", str(batch_size), "EPOCHS:",
          str(epochs))

    #################### Train and Save Model ########################################
    if args.ce_classifier:
        train, test, classes = load_dataset(args)

        # convert labels from string to int
        label_map = {label: i for i, label in enumerate(classes)}
        train = [(sample, label_map[label]) for sample, label in train]
        test = [(sample, label_map[label]) for sample, label in test]

        train_iter = SerialIterator(train,
                                    batch_size,
                                    repeat=True,
                                    shuffle=True)
        test_iter = SerialIterator(test,
                                   batch_size,
                                   repeat=False,
                                   shuffle=False)

        model = CrossEntropyClassifier(base_model, len(classes))

        if int(gpu) >= 0:
            backend.get_device(gpu).use()
            base_model.to_gpu()
            model.to_gpu()

        optimizer = optimizers.Adam(alpha=lr)
        optimizer.setup(model)

        updater = StandardUpdater(train_iter, optimizer, device=gpu)
        evaluator = CEEvaluator(test_iter, model, device=gpu)
    else:
        ### load dataset
        train_triplet, train_samples, train_labels, test_triplet, test_samples, test_labels = load_triplet_dataset(
            args)

        # Decide on triplet loss function; spoiler: lossless sucks
        if args.lossless:
            model = LosslessClassifier(base_model)
        else:
            model = StandardClassifier(base_model)

        ### Initialise triple loss model
        train_iter = TripletIterator(train_triplet,
                                     batch_size=batch_size,
                                     repeat=True,
                                     xp=xp)
        test_iter = TripletIterator(test_triplet, batch_size=batch_size, xp=xp)

        if int(gpu) >= 0:
            backend.get_device(gpu).use()
            base_model.to_gpu()
            model.to_gpu()

        optimizer = optimizers.Adam(alpha=lr)
        optimizer.setup(model)

        updater = triplet.Updater(train_iter, optimizer, device=gpu)
        evaluator = triplet.Evaluator(test_iter, model, device=gpu)

    if args.eval_only is None:
        trainer = get_trainer(updater, evaluator, epochs)
        if plot_loss:
            trainer.extend(
                extensions.PlotReport(["main/loss", "validation/main/loss"],
                                      "epoch",
                                      file_name=f"{model_name}_loss.png"))
        trainer.extend(
            extensions.snapshot(serializers.save_npz,
                                filename=model_name +
                                "_full_{0.updater.epoch:03d}.npz",
                                target=model))
        best_model_name = model_name + "_full_best.npz"
        trainer.extend(extensions.snapshot(serializers.save_npz,
                                           filename=best_model_name,
                                           target=model),
                       trigger=triggers.BestValueTrigger(
                           "validation/main/loss",
                           lambda best, new: new < best))

        if not args.ce_classifier:
            cluster_dir = os.path.join(writer.logdir, "cluster_imgs")
            os.makedirs(cluster_dir, exist_ok=True)
            trainer.extend(ClusterPlotter(base_model, test_labels,
                                          test_samples, batch_size, xp,
                                          cluster_dir),
                           trigger=(1, "epoch"))

        # trainer.extend(VisualBackprop(test_triplet[0], test_labels[0], base_model, [["visual_backprop_anchors"]], xp), trigger=(1, "epoch"))
        # trainer.extend(VisualBackprop(test_triplet[2], test_labels[2], base_model, [["visual_backprop_anchors"]], xp), trigger=(1, "epoch"))

        trainer.run()

        # serializers.save_npz(os.path.join(writer.logdir, model_name + "_base.npz"), base_model)

        for file in glob.glob(f"result/{model_name}*"):
            shutil.move(file, writer.logdir)
        best_model_path = os.path.join(writer.logdir, best_model_name)
    else:
        best_model_path = args.eval_only

    #################### Evaluation ########################################

    serializers.load_npz(best_model_path, model)
    if args.ce_classifier:
        metrics = evaluate_ce(model, test, batch_size, label_map, xp)
    elif args.llr:
        metrics = evaluate_triplet_with_llr(train_samples, train_labels,
                                            test_samples, test_labels, log_dir,
                                            model, batch_size, xp)
    else:
        metrics = evaluate_triplet(model, train_samples, train_labels,
                                   test_samples, test_labels, batch_size,
                                   writer, xp)

    with open(os.path.join(writer.logdir, "metrics.log"), "w") as log_file:
        json.dump(metrics, log_file, indent=4)

    print("Done")
    # sys.exit(0)
    os._exit(0)
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--gpu', type=int, default=-1)
    parser.add_argument('--batchsize', type=int, default=4)
    parser.add_argument('--epoch', type=int, default=10)
    parser.add_argument('--mini', action="store_true")
    args = parser.parse_args()

    if hasattr(multiprocessing, 'set_start_method'):
        multiprocessing.set_start_method('forkserver')
        p = multiprocessing.Process()
        p.start()
        p.join()

    comm = chainermn.create_communicator('pure_nccl')
    print(comm.size)

    device = comm.intra_rank

    num_class = len(voc_bbox_label_names)

    data_augmentation_transform = DataAugmentationTransform(512)
    center_detection_transform = CenterDetectionTransform(512, num_class, 4)

    train = TransformDataset(
        ConcatenatedDataset(VOCBboxDataset(year='2007', split='trainval'),
                            VOCBboxDataset(year='2012', split='trainval')),
        data_augmentation_transform)

    if comm.rank == 0:
        train = TransformDataset(train, center_detection_transform)
        if args.mini:
            train = datasets.SubDataset(train, 0, 100)
    else:
        train = None
    train = chainermn.scatter_dataset(train, comm, shuffle=True)
    train_iter = chainer.iterators.MultiprocessIterator(train,
                                                        args.batchsize //
                                                        comm.size,
                                                        n_processes=2)

    if comm.rank == 0:
        test = VOCBboxDataset(year='2007',
                              split='test',
                              use_difficult=True,
                              return_difficult=True)
        if args.mini:
            test = datasets.SubDataset(test, 0, 20)
        test_iter = chainer.iterators.SerialIterator(test,
                                                     args.batchsize,
                                                     repeat=False,
                                                     shuffle=False)

    detector = CenterDetector(HourglassNet, 512, num_class)
    train_chain = CenterDetectorTrain(detector, 1, 0.1, 1, comm=comm)

    chainer.cuda.get_device_from_id(device).use()
    train_chain.to_gpu()

    optimizer = chainermn.create_multi_node_optimizer(Adam(amsgrad=True), comm)
    optimizer.setup(train_chain)

    updater = StandardUpdater(train_iter, optimizer, device=device)

    trainer = Trainer(updater, (args.epoch, 'epoch'))

    if comm.rank == 0:
        log_interval = 1, 'epoch'
        trainer.extend(extensions.LogReport(trigger=log_interval))
        trainer.extend(extensions.observe_lr(), trigger=log_interval)
        trainer.extend(extensions.PrintReport([
            'epoch',
            'iteration',
            'lr',
            'main/loss',
            'main/hm_loss',
            'main/wh_loss',
            'main/offset_loss',
            'validation/main/map',
        ]),
                       trigger=log_interval)
        trainer.extend(extensions.ProgressBar(update_interval=10))
        trainer.extend(DetectionVOCEvaluator(test_iter,
                                             detector,
                                             use_07_metric=True,
                                             label_names=voc_bbox_label_names),
                       trigger=(1, 'epoch'))
        trainer.extend(extensions.snapshot_object(
            detector, 'detector{.updator.epoch:03}.npz'),
                       trigger=(1, 'epoch'))

    trainer.run()
Esempio n. 20
0
def train(model_class,
          n_base_units,
          trained_model,
          no_obj_weight,
          data,
          result_dir,
          initial_batch_size=10,
          max_batch_size=1000,
          max_epoch=100):
    train_x, train_y, val_x, val_y = data

    max_class_id = 0
    for objs in val_y:
        for obj in objs:
            max_class_id = max(max_class_id, obj[4])
    n_classes = max_class_id + 1

    class_weights = [1.0 for i in range(n_classes)]
    class_weights[0] = no_obj_weight
    train_dataset = YoloDataset(train_x,
                                train_y,
                                target_size=model_class.img_size,
                                n_grid=model_class.n_grid,
                                augment=True,
                                class_weights=class_weights)
    test_dataset = YoloDataset(val_x,
                               val_y,
                               target_size=model_class.img_size,
                               n_grid=model_class.n_grid,
                               augment=False,
                               class_weights=class_weights)

    model = model_class(n_classes, n_base_units)
    model.loss_calc = LossCalculator(n_classes, class_weights=class_weights)

    last_result_file = os.path.join(result_dir, 'best_loss.npz')
    if os.path.exists(last_result_file):
        try:
            chainer.serializers.load_npz(last_result_file, model)
            print('this training has done. resuse the result')
            return model
        except:
            pass

    if trained_model:
        print('copy params from trained model')
        copy_params(trained_model, model)

    optimizer = Adam()
    optimizer.setup(model)

    n_physical_cpu = int(math.ceil(multiprocessing.cpu_count() / 2))

    train_iter = MultiprocessIterator(train_dataset,
                                      batch_size=initial_batch_size,
                                      n_prefetch=n_physical_cpu,
                                      n_processes=n_physical_cpu)
    test_iter = MultiprocessIterator(test_dataset,
                                     batch_size=initial_batch_size,
                                     shuffle=False,
                                     repeat=False,
                                     n_prefetch=n_physical_cpu,
                                     n_processes=n_physical_cpu)
    updater = StandardUpdater(train_iter, optimizer, device=0)
    stopper = triggers.EarlyStoppingTrigger(check_trigger=(1, 'epoch'),
                                            monitor="validation/main/loss",
                                            patients=10,
                                            mode="min",
                                            max_trigger=(max_epoch, "epoch"))
    trainer = Trainer(updater, stopper, out=result_dir)

    trainer.extend(extensions.dump_graph('main/loss'))
    trainer.extend(extensions.LogReport())
    trainer.extend(extensions.ProgressBar(update_interval=10))
    trainer.extend(extensions.Evaluator(test_iter, model, device=0))
    trainer.extend(
        extensions.PrintReport([
            'epoch',
            'main/loss',
            'validation/main/loss',
            'main/cl_loss',
            'validation/main/cl_loss',
            'main/cl_acc',
            'validation/main/cl_acc',
            'main/pos_loss',
            'validation/main/pos_loss',
        ]))
    trainer.extend(extensions.snapshot_object(model, 'best_loss.npz'),
                   trigger=triggers.MinValueTrigger('validation/main/loss'))
    trainer.extend(extensions.snapshot_object(model,
                                              'best_classification.npz'),
                   trigger=triggers.MaxValueTrigger('validation/main/cl_acc'))
    trainer.extend(
        extensions.snapshot_object(model, 'best_position.npz'),
        trigger=triggers.MinValueTrigger('validation/main/pos_loss'))
    trainer.extend(extensions.snapshot_object(model, 'model_last.npz'),
                   trigger=(1, 'epoch'))
    trainer.extend(AdaptiveBatchsizeIncrement(maxsize=max_batch_size),
                   trigger=(1, 'epoch'))

    trainer.run()

    chainer.serializers.load_npz(os.path.join(result_dir, 'best_loss.npz'),
                                 model)
    return model