def do_training(self, model, x_train, y_train):
        params = self.params
        # optimizer = chainer.optimizers.SGD()
        if params["nb_gpus"] == 1:
            import cupy
            id_device = params["gpus"][0]
            cupy.cuda.Device(id_device).use()
        optimizer = chainer.optimizers.MomentumSGD(lr=0.001, momentum=0.95)
        optimizer.setup(model)
        for id_epoch in range(self.params["nb_epoch"]):
            print("epoch ", id_epoch)
            for data, target in zip(x_train, y_train):
                if self.params["nb_gpus"] == 1:
                    # TODO: option for on-core training
                    data = cupy.array(data)
                    target = cupy.array(target)
                pred = model.predictor(data)
                loss = F.softmax_cross_entropy(pred, target)
                loss.backward()
        return

        # using Chainer's native iterators
        x_train = x_train.reshape((x_train.shape[0] * x_train.shape[1], ) +
                                  x_train.shape[2:])
        y_train = y_train.reshape((y_train.shape[0] * y_train.shape[1], ))
        train = chainer.datasets.tuple_dataset.TupleDataset(x_train, y_train)
        if params["nb_gpus"] == 0:
            train_iter = chainer.iterators.SerialIterator(
                train,
                batch_size=params["batch_size"],
                repeat=True,
                shuffle=False)
        else:
            train_iter = chainer.iterators.MultiprocessIterator(
                train,
                batch_size=params["batch_size"],
                repeat=True,
                shuffle=True,
                n_processes=4)
        if params["nb_gpus"] == 0:
            updater = training.StandardUpdater(train_iter, optimizer)
        else:
            if params["nb_gpus"] == 1:
                updater = training.StandardUpdater(train_iter,
                                                   optimizer,
                                                   device=id_device)
            else:
                dic_devices = {str(i): i for i in params["gpus"][1:]}
                dic_devices["main"] = params["gpus"][0]
                updater = training.ParallelUpdater(train_iter,
                                                   optimizer,
                                                   devices=dic_devices)

        trainer = training.Trainer(updater, (self.params["nb_epoch"], 'epoch'),
                                   out='/tmp/result')
        trainer.extend(extensions.LogReport())
        trainer.extend(
            extensions.PrintReport(
                ['epoch', 'main/loss', 'main/accuracy', "elapsed_time"]))
        trainer.run()
 def prepare_updater(self, train_it, optimizer):
     if config.training_params.updater_type == 'standerd':
         return training.StandardUpdater( \
             train_it, optimizer, device=config.gpu)
     elif config.training_params.updater_type == 'parallel':
         return training.ParallelUpdater( \
             train_it, optimizer, devices={'main': 1, 'second': 0})
Exemple #3
0
def main():
    model = VGGNet()
    model.train = True

    model = Classifier(model)
    model0 = copy.deepcopy(model)
    model1 = copy.deepcopy(model)
    model2 = copy.deepcopy(model)
    model3 = copy.deepcopy(model)
    model0.to_gpu(0)
    model1.to_gpu(1)
    model2.to_gpu(2)
    model3.to_gpu(3)
    ds = np.load(
        "/home/kaunildhruv/fbsource/fbcode/experimental/themachinist/ml/autoencoders/preprocess_ds.npz"
    )
    print("Dataset loaded.")
    train, test = tuple_dataset.TupleDataset(
        ds["train_img"],
        ds["train_lable"]), tuple_dataset.TupleDataset(ds["test_img"],
                                                       ds["test_lable"])

    train_iter = iterators.SerialIterator(train, batch_size=bs, shuffle=True)
    test_iter = iterators.SerialIterator(test,
                                         batch_size=bs,
                                         shuffle=False,
                                         repeat=False)
    optimizer = optimizers.Adam(alpha=0.001, beta1=0.9, beta2=0.999, eps=1e-08)
    optimizer.setup(model)

    updater = training.ParallelUpdater(train_iter,
                                       optimizer,
                                       devices={
                                           'main': 0,
                                           'first': 1,
                                           'second': 2,
                                           'third': 3
                                       })
    trainer = training.Trainer(updater, (epochs, 'epoch'), out='result')
    #trainer.extend(Evaluator(test_iter, model))
    trainer.extend(extensions.LogReport())
    interval = (5, 'epoch')
    iter_interval = (10000, 'iteration')

    trainer.extend(extensions.snapshot_object(model,
                                              'epoch-{.updater.epoch}.model'),
                   trigger=interval)
    trainer.extend(extensions.snapshot_object(
        model, 'iteration-{.updater.iteration}.model'),
                   trigger=iter_interval)
    trainer.extend(extensions.snapshot(), trigger=interval)

    trainer.extend(extensions.PrintReport(['epoch', 'main/loss']))
    trainer.extend(extensions.ProgressBar())
    trainer.run()
Exemple #4
0
def create_updater(train_iter, optimizer, devices):
    if HAVE_NCCL and len(devices) > 1:
        updater = training.updaters.MultiprocessParallelUpdater(
            train_iter, optimizer, devices=devices)
    elif len(devices) > 1:
        optimizer.lr /= len(devices)
        updater = training.ParallelUpdater(train_iter,
                                           optimizer,
                                           devices=devices)
    else:
        updater = training.StandardUpdater(train_iter,
                                           optimizer,
                                           device=devices['main'])
    return updater
def run_training(
        net, train, valid, result_dir, batchsize=64, devices=-1,
        training_epoch=300, initial_lr=0.05, lr_decay_rate=0.5,
        lr_decay_epoch=30, weight_decay=0.0005):
    # Iterator
    train_iter = iterators.MultiprocessIterator(train, batchsize)
    test_iter = iterators.MultiprocessIterator(valid, batchsize, False, False)

    # Model
    net = L.Classifier(net)

    # Optimizer
    optimizer = optimizers.MomentumSGD(lr=initial_lr)
    optimizer.setup(net)
    if weight_decay > 0:
        optimizer.add_hook(chainer.optimizer.WeightDecay(weight_decay))

    # Updater
    if isinstance(devices, int):
        devices['main'] = devices
        updater = training.StandardUpdater(
            train_iter, optimizer, device=devices)
    elif isinstance(devices, dict):
        updater = training.ParallelUpdater(
            train_iter, optimizer, devices=devices)

    # 6. Trainer
    trainer = training.Trainer(
        updater, (training_epoch, 'epoch'), out=result_dir)

    # 7. Trainer extensions
    trainer.extend(extensions.LogReport())
    trainer.extend(extensions.observe_lr())
    trainer.extend(extensions.Evaluator(
        test_iter, net, device=devices['main']), name='val')
    trainer.extend(extensions.PrintReport(
        ['epoch', 'main/loss', 'main/accuracy', 'val/main/loss',
         'val/main/accuracy', 'elapsed_time', 'lr']))
    trainer.extend(extensions.PlotReport(
        ['main/loss', 'val/main/loss'], x_key='epoch', file_name='loss.png'))
    trainer.extend(extensions.PlotReport(
        ['main/accuracy', 'val/main/accuracy'], x_key='epoch',
        file_name='accuracy.png'))
    trainer.extend(extensions.ExponentialShift(
        'lr', lr_decay_rate), trigger=(lr_decay_epoch, 'epoch'))
    trainer.extend(extensions.snapshot_object(net.predictor, 'model_{.updater.epoch}.npz'), trigger=(10, 'epoch'))
    trainer.run()

    return net
Exemple #6
0
    def train(self, epoch_num=40, batch_size=128, gpu=-1):
        train = chainer.datasets.LabeledImageDataset(
            "../dataset/train/info.txt", "../dataset/train")
        test = chainer.datasets.LabeledImageDataset(
            "../dataset/validation/info.txt", "../dataset/validation")

        model = L.Classifier(
            Model(out_size=25))  # loss function, default softmax_cross_entropy
        alpha = 1e-4
        optimizer = optimizers.Adam(alpha=alpha)
        optimizer.setup(model)
        model.predictor.vgg.disable_update()  # not update weight of VGG16

        train = TransformDataset(train, self.transform)
        test = TransformDataset(test, self.transform)

        train_iter = chainer.iterators.SerialIterator(train, batch_size)
        test_iter = chainer.iterators.SerialIterator(test,
                                                     batch_size,
                                                     repeat=False,
                                                     shuffle=False)
        #updater = training.StandardUpdater(train_iter, optimizer, device=gpu)
        updater = training.ParallelUpdater(train_iter,
                                           optimizer,
                                           devices=self.gpu_devices)
        trainer = training.Trainer(updater, (epoch_num, 'epoch'), out='result')
        trainer.extend(
            extensions.Evaluator(test_iter,
                                 model,
                                 device=self.gpu_devices['main']))
        trainer.extend(extensions.LogReport())
        trainer.extend(
            extensions.PrintReport([
                'epoch', 'main/loss', 'validation/main/loss', 'main/accuracy',
                'validation/main/accuracy', 'elapsed_time'
            ]))
        #trainer.extend(extensions.PlotReport(['main/loss', 'validation/main/loss'], 'epoch', file_name='loss.png'))
        #trainer.extend(extensions.PlotReport(['main/accuracy', 'validation/main/accuracy'], 'epoch', file_name='accuracy.png'))
        trainer.run()

        model.to_cpu()
        serializers.save_npz("mymodel.npz", model)
Exemple #7
0
def updater_creator(iterator, optimizer, devices, **kwargs):
    """A sample updater creator.

    An updater creator method should return an Updater object.
    Once an updter creator method is specified in the config YAML, the method
    will take iterator object, optimizer object, device dictionary, and "args"
    dictionary defined in the config YAML. You can make a custom Updater with
    those objects and return it.

    """
    if HAVE_NCCL and len(devices) > 1:
        updater = training.updaters.MultiprocessParallelUpdater(
            iterator, optimizer, devices=devices)
    elif len(devices) > 1:
        optimizer.lr /= len(devices)
        updater = training.ParallelUpdater(iterator,
                                           optimizer,
                                           devices=devices)
    else:
        updater = training.StandardUpdater(iterator,
                                           optimizer,
                                           device=devices['main'])
    return updater
Exemple #8
0
def main():
    args = parse_args()

    chainer.global_config.autotune = True
    #chainer.set_debug(True)

    # Set the random seeds
    random.seed(args.seed)
    np.random.seed(args.seed)

    # Set up Devices
    devices = utils.setup_devices(args.gpus)

    # Load model
    ext = os.path.splitext(args.model_file)[1]
    model_path = '.'.join(os.path.split(args.model_file)).replace(ext, '')
    model = import_module(model_path)
    model = getattr(model, args.model_name)(args.output_class)
    #model = L.Classifier(model)
    model.to_gpu()

    # create result dir
    result_dir = create_result_dir(args.model_name)
    shutil.copy(args.model_file,
                os.path.join(result_dir, os.path.basename(args.model_file)))
    with open(os.path.join(result_dir, 'args'), 'w') as fp:
        fp.write(json.dumps(vars(args)))
    print(json.dumps(vars(args), sort_keys=True, indent=4))

    # Create Dataset
    # Load the datasets and mean file
    mean = np.load(args.mean)
    train = ImagenetDataset(args.train_list, args.train_image)
    valid = ImagenetDataset(args.val_list, args.val_image)

    train_transform = partial(transform.food101_transform,
                              mean=mean,
                              random_angle=args.random_angle,
                              expand_ratio=args.expand_ratio,
                              crop_size=args.crop_size,
                              train=True)
    valid_transform = partial(transform.food101_transform,
                              mean=mean,
                              train=False)

    train = TransformDataset(train, train_transform)
    valid = TransformDataset(valid, valid_transform)

    # Create Iterator
    train_iter = chainer.iterators.MultiprocessIterator(train,
                                                        args.batchsize,
                                                        n_processes=4)
    val_iter = chainer.iterators.MultiprocessIterator(valid,
                                                      args.batchsize,
                                                      shuffle=False,
                                                      repeat=False,
                                                      n_processes=4)
    #train_iter = chainer.iterators.SerialIterator(train, args.batchsize)
    #val_iter = chainer.iterators.SerialIterator(valid, args.batchsize, repeat=False, shuffle=False)

    # Set Optimizer
    optimizer = optimizers.MomentumSGD(lr=args.initial_lr, momentum=0.9)
    optimizer.setup(model)
    if args.weight_decay > 0:
        optimizer.add_hook(chainer.optimizer.WeightDecay(args.weight_decay))
    # optimizer.use_fp32_update()

    # Updater
    updater = training.ParallelUpdater(train_iter, optimizer, devices=devices)

    # Trainer
    trainer = training.Trainer(updater, (args.training_epoch, 'epoch'),
                               result_dir)

    # Trainer Extensions
    trainer.extend(extensions.LogReport())
    trainer.extend(extensions.observe_lr())
    trainer.extend(extensions.Evaluator(val_iter,
                                        model,
                                        device=devices['main']),
                   name='val')
    trainer.extend(extensions.ExponentialShift('lr', args.lr_decay_rate),
                   trigger=(args.lr_decay_epoch, 'epoch'))
    trainer.extend(
        extensions.PrintReport([
            'epoch', 'main/loss', 'main/accuracy', 'val/main/loss',
            'val/main/accuracy', 'elapsed_time', 'lr'
        ]))

    if extensions.PlotReport.available():
        trainer.extend(
            extensions.PlotReport(['main/loss', 'val/main/loss'],
                                  x_key='epoch',
                                  file_name='loss.png'))
        trainer.extend(
            extensions.PlotReport(['main/accuracy', 'val/main/accuracy'],
                                  x_key='epoch',
                                  file_name='accuracy.png'))

    # Print progress bar
    trainer.extend(extensions.ProgressBar())

    # Save the model which minimizes validation loss
    trainer.extend(extensions.snapshot_object(model, filename='bestmodel.npz'),
                   trigger=training.triggers.MinValueTrigger('val/main/loss'))

    trainer.run()
Exemple #9
0
def main():
    global args
    args = parse()
    args.guassian = True
    print('Guassian state:'+str(args.guassian))
    
    currentDT = datetime.datetime.now()
    args.out_dir += str(currentDT).split('.')[0]
    
    use_gpu = args.gpus[0] >= 0
    if len(args.gpus) > 1:
        gpus = {'main': args.gpus[1],'gpu{1}': args.gpus[0]}
        args.gpus = gpus
        
    #Dataset
    train,test = get_dataset(args)
    mean = np.mean([x for x, _ in train], axis=(0, 2, 3))
    std = np.std([x for x, _ in train], axis=(0, 2, 3))

    #Iterators
    train_iter = ch.iterators.MultiprocessIterator(train, args.batchsize)
    test_iter = ch.iterators.MultiprocessIterator(test, args.batchsize, False, False)

    #net
    net = HandDetect.HandDetect()
    net = WeightedLoss.WeightedLoss(net,0.01,use_gpu)

    #Optimizer
    optimizer = optimizers.MomentumSGD(lr=args.lr, momentum=args.momentum)
    optimizer.setup(net)
    if args.weight_decay > 0:
        optimizer.add_hook(ch.optimizer.WeightDecay(args.weight_decay))

    #Updater
    if isinstance(args.gpus,dict):
        updater = training.ParallelUpdater(train_iter, optimizer, devices=args.gpus)
    else:
        updater = training.StandardUpdater(train_iter, optimizer, device=args.gpus[0])
        
    #Trainer
    trainer = training.Trainer(updater, (args.training_epoch, 'epoch'), out=args.out_dir)
    
    
    #Training extensions
    trainer.extend(extensions.Evaluator(test_iter, net, device=0))
    
    #trainer.extend(extensions.snapshot(), trigger=(20, 'epoch'))

    trainer.extend(extensions.LogReport())
    print("The PlotReport is " + str(extensions.PlotReport.available()))
    if extensions.PlotReport.available():
        trainer.extend( 
            extensions.PlotReport(['main/loss', 'validation/main/loss'],
                                  'epoch', file_name='loss.png'))
        trainer.extend(
            extensions.PlotReport(
                ['main/accuracy', 'validation/main/accuracy'],
                'epoch', file_name='accuracy.png'))

    trainer.extend(extensions.PrintReport(
        ['epoch', 'main/loss', 'validation/main/loss', 'elapsed_time']))

    #Starting training process and save model
    print("Start Training")
    trainer.run()
    serializers.save_npz(args.out_dir+'/hand.model',net.predictor)
    
    #generate some heatmaps to judge the result
    os.mkdir(args.out_dir + '/generate')
    for j,(hand,_) in enumerate(train):
        hand=cuda.to_gpu(hand.reshape((1,3,224,224)),device=0)
        HM = net.predictor(hand)
        for i in range(5):
            t=cuda.to_cpu(HM[0,i,...].data)
            cv2.imwrite(args.out_dir + '/generate/%d_%d.png' % (j,i),t * 255)
    optimizer.setup(model)

    # Load the MNIST dataset
    train_iter = chainer.iterators.SerialIterator(train, args.batch_size)
    test_iter = chainer.iterators.SerialIterator(test,
                                                 args.batch_size,
                                                 repeat=False,
                                                 shuffle=False)

    # Set up a trainer
    device = 0 if num_gpus > 0 else -1  # -1 indicates CPU, 0 indicates first GPU device.
    if num_gpus > 0:
        updater = training.ParallelUpdater(
            train_iter,
            optimizer,
            # The device of the name 'main' is used as a "master", while others are
            # used as slaves. Names other than 'main' are arbitrary.
            devices={('main' if device == 0 else str(device)): device
                     for device in range(num_gpus)})
    else:
        updater = training.StandardUpdater(train_iter,
                                           optimizer,
                                           device=device)

    # Write output files to output_data_dir. These are zipped and uploaded to S3 output path as output.tar.gz.
    trainer = training.Trainer(updater, (args.epochs, 'epoch'),
                               out=args.output_dir)

    # Evaluate the model with the test dataset for each epoch
    trainer.extend(extensions.Evaluator(test_iter, model, device=device))
Exemple #11
0
def main():
    parser = argparse.ArgumentParser(description='Chainer YOLOv3 Train')
    parser.add_argument('--names')
    parser.add_argument('--train')
    parser.add_argument('--valid', default='')
    parser.add_argument('--detection', default='')
    
    parser.add_argument('--batchsize', '-b', type=int, default=8)
    parser.add_argument('--iteration', '-i', type=int, default=50200)
    parser.add_argument('--gpus', '-g', type=int, nargs='*', default=[])
    parser.add_argument('--out', '-o', default='yolov3-result')
    parser.add_argument('--seed', type=int, default=0)
    parser.add_argument('--display_interval', type=int, default=100)
    parser.add_argument('--snapshot_interval', type=int, default=100)
    parser.add_argument('--ignore_thresh', type=float, default=0.5)
    parser.add_argument('--thresh', type=float, default=0.5)
    parser.add_argument('--darknet', default='')
    parser.add_argument('--darknet_class', type=int, default=-1)
    parser.add_argument('--steps', type=int, nargs='*', default=[-10200, -5200])
    parser.add_argument('--scales', type=float, nargs='*', default=[0.1, 0.1])
    args = parser.parse_args()
    
    print('GPUs: {}'.format(args.gpus))
    print('# Minibatch-size: {}'.format(args.batchsize))
    print('# iteration: {}'.format(args.iteration))
    
    class_names = load_list(args.names)
    
    random.seed(args.seed)
    np.random.seed(args.seed)
    
    base = None
    if len(args.darknet) > 0:
        darknet_class = args.darknet_class if args.darknet_class > 0 else len(class_names)
        darknet53 = Darknet53(darknet_class)
        serializers.load_npz(args.darknet, darknet53)
        base = darknet53.base
    yolov3 = YOLOv3(len(class_names), base, ignore_thresh=args.ignore_thresh)
    model = YOLOv3Loss(yolov3)
    device = -1
    if len(args.gpus) > 0:
        device = args.gpus[0]
        cuda.cupy.random.seed(args.seed)
        cuda.get_device_from_id(args.gpus[0]).use()
    if len(args.gpus) == 1:
        model.to_gpu()
    
    optimizer = chainer.optimizers.MomentumSGD(lr=0.001)
    optimizer.setup(model)
    optimizer.add_hook(optimizer_hooks.WeightDecay(0.0005), 'hook_decay')
    optimizer.add_hook(optimizer_hooks.GradientClipping(10.0), 'hook_grad_clip')
    
    train = YOLODataset(args.train, train=True, classifier=False, 
                        jitter=0.3, hue=0.1, sat=1.5, val=1.5)
    #train_iter = chainer.iterators.SerialIterator(train, args.batchsize)
    
    
    train_iter = chainer.iterators.MultiprocessIterator(train, args.batchsize,
                                                        shared_mem=(448**2*3+(1+4)*100)*4)
    
    if len(args.gpus) <= 1:
        updater = training.StandardUpdater(
            train_iter, optimizer, converter=concat_yolo, device=device)
    else:
        devices = {'main': args.gpus[0]}
        for gpu in args.gpus[1:]:
            devices['gpu{}'.format(gpu)] = gpu
        updater = training.ParallelUpdater(
            train_iter, optimizer, converter=concat_yolo, devices=devices)
    trainer = training.Trainer(
        updater, (args.iteration, 'iteration'), out=args.out)
    
    display_interval = (args.display_interval, 'iteration')
    snapshot_interval = (args.snapshot_interval, 'iteration')
    
    print_entries = ['epoch', 'iteration', 'main/loss', 'elapsed_time']
    plot_keys = ['main/loss']
    snapshot_key = 'main/loss'
    
    if len(args.valid) > 0:
        print_entries = ['epoch', 'iteration', 
             'main/loss', 'validation/main/loss', 'elapsed_time']
        plot_keys = ['main/loss', 'validation/main/loss']
        snapshot_key = 'validation/main/loss'
        
        test = YOLODataset(args.valid, train=False, classifier=False)
        test_iter = chainer.iterators.SerialIterator(test, args.batchsize,
                                                     repeat=False, shuffle=False)
        trainer.extend(extensions.Evaluator(
            test_iter, model, converter=concat_yolo, 
            device=device), trigger=display_interval)
    
    trainer.extend(extensions.dump_graph('main/loss'))
    trainer.extend(extensions.LogReport(trigger=display_interval))
    if extensions.PlotReport.available():
        trainer.extend(
            extensions.PlotReport(
                plot_keys, 'iteration',
                display_interval, file_name='loss.png'))
    
    trainer.extend(extensions.PrintReport(print_entries),
                  trigger=display_interval)
    trainer.extend(extensions.ProgressBar(update_interval=1))
    
    trainer.extend(extensions.snapshot_object(
        yolov3, 'yolov3_snapshot.npz'), 
        trigger=training.triggers.MinValueTrigger(
            snapshot_key, snapshot_interval))
    trainer.extend(extensions.snapshot_object(
        yolov3, 'yolov3_backup.npz'), 
        trigger=snapshot_interval)
    trainer.extend(extensions.snapshot_object(
        yolov3, 'yolov3_final.npz'), 
        trigger=(args.iteration, 'iteration'))
    
    steps = args.steps
    for i in range(len(steps)):
        if steps[i] < 0:
            steps[i] = args.iteration + steps[i]
    scales = args.scales
    print('# steps: {}'.format(steps))
    print('# scales: {}'.format(scales))
        
    trainer.extend(DarknetShift(
        optimizer, 'steps', args.iteration, burn_in=1000,
        steps=steps, scales=scales
    ))
    trainer.extend(CropSizeUpdater(train, 
                                   [(10+i)*32 for i in range(0,5)],
                                   args.iteration - 200))
    
    if len(args.detection):
        detector = YOLOv3Predictor(yolov3, thresh=args.thresh)
        trainer.extend(YOLODetection(
            detector, 
            load_list(args.detection),
            class_names, (416, 416),args.thresh,
            trigger=display_interval, device=device
        ))
    
    print('')
    print('RUN')
    print('')
    trainer.run()
Exemple #12
0
def main():
    # Parse the arguments.
    args = parse_arguments()
    augment = False if args.augment == 'False' else True
    multi_gpu = False if args.multi_gpu == 'False' else True
    if args.label:
        labels = args.label
        class_num = len(labels) if isinstance(labels, list) else 1
    else:
        raise ValueError('No target label was specified.')

    # Dataset preparation. Postprocessing is required for the regression task.
    def postprocess_label(label_list):
        label_arr = np.asarray(label_list, dtype=np.int32)
        return label_arr

    # Apply a preprocessor to the dataset.
    logging.info('Preprocess train dataset and test dataset...')
    preprocessor = preprocess_method_dict[args.method]()
    parser = CSVFileParserForPair(preprocessor, postprocess_label=postprocess_label,
                                  labels=labels, smiles_cols=['smiles_1', 'smiles_2'])
    train = parser.parse(args.train_datafile)['dataset']
    valid = parser.parse(args.valid_datafile)['dataset']

    if augment:
        logging.info('Utilizing data augmentation in train set')
        train = augment_dataset(train)

    num_train = train.get_datasets()[0].shape[0]
    num_valid = valid.get_datasets()[0].shape[0]
    logging.info('Train/test split: {}/{}'.format(num_train, num_valid))

    if len(args.net_hidden_dims):
        net_hidden_dims = tuple([int(net_hidden_dim) for net_hidden_dim in args.net_hidden_dims.split(',')])
    else:
        net_hidden_dims = ()
    fp_attention = True if args.fp_attention else False
    update_attention = True if args.update_attention else False
    weight_tying = False if args.weight_tying == 'False' else True
    attention_tying = False if args.attention_tying == 'False' else True
    fp_batch_normalization = True if args.fp_bn == 'True' else False
    layer_aggregator = None if args.layer_aggregator == '' else args.layer_aggregator
    context = False if args.context == 'False' else True
    output_activation = functions.relu if args.output_activation == 'relu' else None
    predictor = set_up_predictor(method=args.method,
                                 fp_hidden_dim=args.fp_hidden_dim, fp_out_dim=args.fp_out_dim, conv_layers=args.conv_layers,
                                 concat_hidden=args.concat_hidden, layer_aggregator=layer_aggregator,
                                 fp_dropout_rate=args.fp_dropout_rate, fp_batch_normalization=fp_batch_normalization,
                                 net_hidden_dims=net_hidden_dims, class_num=class_num,
                                 sim_method=args.sim_method, fp_attention=fp_attention, weight_typing=weight_tying, attention_tying=attention_tying,
                                 update_attention=update_attention,
                                 context=context, context_layers=args.context_layers, context_dropout=args.context_dropout,
                                 message_function=args.message_function, readout_function=args.readout_function,
                                 num_timesteps=args.num_timesteps, num_output_hidden_layers=args.num_output_hidden_layers,
                                 output_hidden_dim=args.output_hidden_dim, output_activation=output_activation,
                                 symmetric=args.symmetric
                                 )

    train_iter = SerialIterator(train, args.batchsize)
    test_iter = SerialIterator(valid, args.batchsize,
                              repeat=False, shuffle=False)

    metrics_fun = {'accuracy': F.binary_accuracy}
    classifier = Classifier(predictor, lossfun=F.sigmoid_cross_entropy,
                            metrics_fun=metrics_fun, device=args.gpu)

    # Set up the optimizer.
    optimizer = optimizers.Adam(alpha=args.learning_rate, weight_decay_rate=args.weight_decay_rate)
    # optimizer = optimizers.Adam()
    # optimizer = optimizers.SGD(lr=args.learning_rate)
    optimizer.setup(classifier)
    # add regularization
    if args.max_norm > 0:
        optimizer.add_hook(chainer.optimizer.GradientClipping(threshold=args.max_norm))
    if args.l2_rate > 0:
        optimizer.add_hook(chainer.optimizer.WeightDecay(rate=args.l2_rate))
    if args.l1_rate > 0:
        optimizer.add_hook(chainer.optimizer.Lasso(rate=args.l1_rate))

    # Set up the updater.
    if multi_gpu:
        logging.info('Using multiple GPUs')
        updater = training.ParallelUpdater(train_iter, optimizer, devices={'main': 0, 'second': 1},
                                           converter=concat_mols)
    else:
        logging.info('Using single GPU')
        updater = training.StandardUpdater(train_iter, optimizer, device=args.gpu,
                                           converter=concat_mols)

    # Set up the trainer.
    logging.info('Training...')
    # add stop_trigger parameter
    early_stop = triggers.EarlyStoppingTrigger(monitor='validation/main/loss', patients=30, max_trigger=(500, 'epoch'))
    out = 'output' + '/' + args.out
    trainer = training.Trainer(updater, stop_trigger=early_stop, out=out)

    # trainer = training.Trainer(updater, (args.epoch, 'epoch'), out=args.out)

    trainer.extend(E.Evaluator(test_iter, classifier,
                               device=args.gpu, converter=concat_mols))

    train_eval_iter = SerialIterator(train, args.batchsize,
                                       repeat=False, shuffle=False)

    trainer.extend(AccuracyEvaluator(
        train_eval_iter, classifier, eval_func=predictor,
        device=args.gpu, converter=concat_mols, name='train_acc',
        pos_labels=1, ignore_labels=-1, raise_value_error=False))
    # extension name='validation' is already used by `Evaluator`,
    # instead extension name `val` is used.
    trainer.extend(AccuracyEvaluator(
        test_iter, classifier, eval_func=predictor,
        device=args.gpu, converter=concat_mols, name='val_acc',
        pos_labels=1, ignore_labels=-1))

    trainer.extend(ROCAUCEvaluator(
        train_eval_iter, classifier, eval_func=predictor,
        device=args.gpu, converter=concat_mols, name='train_roc',
        pos_labels=1, ignore_labels=-1, raise_value_error=False))
    # extension name='validation' is already used by `Evaluator`,
    # instead extension name `val` is used.
    trainer.extend(ROCAUCEvaluator(
        test_iter, classifier, eval_func=predictor,
        device=args.gpu, converter=concat_mols, name='val_roc',
        pos_labels=1, ignore_labels=-1))

    trainer.extend(PRCAUCEvaluator(
        train_eval_iter, classifier, eval_func=predictor,
        device=args.gpu, converter=concat_mols, name='train_prc',
        pos_labels=1, ignore_labels=-1, raise_value_error=False))
    # extension name='validation' is already used by `Evaluator`,
    # instead extension name `val` is used.
    trainer.extend(PRCAUCEvaluator(
        test_iter, classifier, eval_func=predictor,
        device=args.gpu, converter=concat_mols, name='val_prc',
        pos_labels=1, ignore_labels=-1))

    trainer.extend(F1Evaluator(
        train_eval_iter, classifier, eval_func=predictor,
        device=args.gpu, converter=concat_mols, name='train_f',
        pos_labels=1, ignore_labels=-1, raise_value_error=False))
    # extension name='validation' is already used by `Evaluator`,
    # instead extension name `val` is used.
    trainer.extend(F1Evaluator(
        test_iter, classifier, eval_func=predictor,
        device=args.gpu, converter=concat_mols, name='val_f',
        pos_labels=1, ignore_labels=-1))

    # apply shift strategy to learning rate every 10 epochs
    # trainer.extend(E.ExponentialShift('alpha', args.exp_shift_rate), trigger=(10, 'epoch'))
    if args.exp_shift_strategy == 1:
        trainer.extend(E.ExponentialShift('alpha', args.exp_shift_rate),
                       trigger=triggers.ManualScheduleTrigger([10, 20, 30, 40, 50, 60], 'epoch'))
    elif args.exp_shift_strategy == 2:
        trainer.extend(E.ExponentialShift('alpha', args.exp_shift_rate),
                       trigger=triggers.ManualScheduleTrigger([5, 10, 15, 20, 25, 30], 'epoch'))
    elif args.exp_shift_strategy == 3:
        trainer.extend(E.ExponentialShift('alpha', args.exp_shift_rate),
                       trigger=triggers.ManualScheduleTrigger([5, 10, 15, 20, 25, 30, 40, 50, 60, 70], 'epoch'))
    else:
        raise ValueError('No such strategy to adapt learning rate')
    # # observation of learning rate
    trainer.extend(E.observe_lr(), trigger=(1, 'iteration'))

    entries = [
        'epoch',
        'main/loss', 'train_acc/main/accuracy', 'train_roc/main/roc_auc', 'train_prc/main/prc_auc',
        # 'train_p/main/precision', 'train_r/main/recall',
        'train_f/main/f1',
        'validation/main/loss', 'val_acc/main/accuracy', 'val_roc/main/roc_auc', 'val_prc/main/prc_auc',
        # 'val_p/main/precision', 'val_r/main/recall',
        'val_f/main/f1',
        'lr',
        'elapsed_time']
    trainer.extend(E.PrintReport(entries=entries))
    # change from 10 to 2 on Mar. 1 2019
    trainer.extend(E.snapshot(), trigger=(2, 'epoch'))
    trainer.extend(E.LogReport())
    trainer.extend(E.ProgressBar())
    trainer.extend(E.PlotReport(['main/loss', 'validation/main/loss'], 'epoch', file_name='loss.png'))
    trainer.extend(E.PlotReport(['train_acc/main/accuracy', 'val_acc/main/accuracy'], 'epoch', file_name='accuracy.png'))

    if args.resume:
        resume_path = os.path.join(out, args.resume)
        logging.info('Resume training according to snapshot in {}'.format(resume_path))
        chainer.serializers.load_npz(resume_path, trainer)

    trainer.run()

    # Save the regressor's parameters.
    model_path = os.path.join(out, args.model_filename)
    logging.info('Saving the trained models to {}...'.format(model_path))
    classifier.save_pickle(model_path, protocol=args.protocol)
                    optimizer.setup(model)

                    train_iter.reset()
                    test_iter.reset()

                    if mlp_config['context'] == 'gpu':
                        updater = training.StandardUpdater(train_iter,
                                                           optimizer,
                                                           device=0)
                    elif mlp_config['context'] == 'multi-gpu':
                        assert mlp_config['gpus'] > 1
                        device_dict = {'main': 0}
                        for i in range(mlp_config['gpus'] - 1):
                            device_dict["gpu_{}".format(i + 1)] = i + 1
                        updater = training.ParallelUpdater(train_iter,
                                                           optimizer,
                                                           devices=device_dict)
                    else:
                        updater = training.StandardUpdater(
                            train_iter, optimizer)

                    trainer = training.Trainer(updater,
                                               (mlp_config['epochs'], 'epoch'),
                                               out='result')
                    if mlp_config['context'] == 'gpu':
                        trainer.extend(
                            extensions.Evaluator(test_iter, model, device=0))
                    elif mlp_config['context'] == 'multi-gpu':
                        trainer.extend(
                            extensions.Evaluator(test_iter, model, device=0))
                    else:
def main():
    parser = argparse.ArgumentParser(description='Chainer YOLOv3 VOC Train')
    parser.add_argument('--batchsize', '-b', type=int, default=8)
    parser.add_argument('--iteration', '-i', type=int, default=50200)
    parser.add_argument('--gpus', '-g', type=int, nargs='*', default=[])
    parser.add_argument('--out', '-o', default='yolov3-voc-result')
    parser.add_argument('--seed', type=int, default=0)
    parser.add_argument('--display_interval', type=int, default=100)
    parser.add_argument('--snapshot_interval', type=int, default=100)
    parser.add_argument('--ignore_thresh', type=float, default=0.5)
    parser.add_argument('--thresh', type=float, default=0.4)
    parser.add_argument('--darknet', default='')
    parser.add_argument('--validation_size', type=int, default=32)
    args = parser.parse_args()

    print('GPUs: {}'.format(args.gpus))
    print('# Minibatch-size: {}'.format(args.batchsize))
    print('# iteration: {}'.format(args.iteration))
    print('')
    
    random.seed(args.seed)
    np.random.seed(args.seed)
    
    base = None
    if len(args.darknet) > 0:
        darknet53 = Darknet53(20)
        serializers.load_npz(args.darknet, darknet53)
        base = darknet53.base
    yolov3 = YOLOv3(20, base, ignore_thresh=args.ignore_thresh)
    model = YOLOv3Loss(yolov3)
    device = -1
    if len(args.gpus) > 0:
        device = args.gpus[0]
        cuda.cupy.random.seed(args.seed)
        cuda.get_device_from_id(args.gpus[0]).use()
    if len(args.gpus) == 1:
        model.to_gpu()
    
    optimizer = chainer.optimizers.MomentumSGD(lr=0.001)
    optimizer.setup(model)
    optimizer.add_hook(optimizer_hooks.WeightDecay(0.0005), 'hook_decay')
    optimizer.add_hook(optimizer_hooks.GradientClipping(10.0), 'hook_grad_clip')
    
    
    train = VOCBboxDataset(split='train')
    test = VOCBboxDataset(split='val')
    train = YOLOVOCDataset(train, classifier=False, jitter=0.3,
                        hue=0.1, sat=1.5, val=1.5)
    #train = train[np.arange(args.batchsize)]
    test = YOLOVOCDataset(test, classifier=False)
    test = test[np.random.permutation(np.arange(len(test)))[:min(args.validation_size, len(test))]]
    train_iter = chainer.iterators.SerialIterator(train, args.batchsize)
    test_iter = chainer.iterators.SerialIterator(test, args.batchsize,
                                                 repeat=False, shuffle=False)

    if len(args.gpus) <= 1:
        updater = training.StandardUpdater(
            train_iter, optimizer, converter=concat_yolo, device=device)
    else:
        devices = {'main': args.gpus[0]}
        for gpu in args.gpus[1:]:
            devices['gpu{}'.format(gpu)] = gpu
        updater = training.ParallelUpdater(
            train_iter, optimizer, converter=concat_yolo, devices=devices)
    trainer = training.Trainer(
        updater, (args.iteration, 'iteration'), out=args.out)
    
    display_interval = (args.display_interval, 'iteration')
    snapshot_interval = (args.snapshot_interval, 'iteration')
    
    trainer.extend(extensions.Evaluator(
        test_iter, model, converter=concat_yolo, 
        device=device), trigger=display_interval)
    trainer.extend(extensions.dump_graph('main/loss'))
    trainer.extend(extensions.LogReport(trigger=display_interval))
    if extensions.PlotReport.available():
        trainer.extend(
            extensions.PlotReport(
                ['main/loss', 'validation/main/loss'], 'iteration',
                display_interval, file_name='loss.png'))
    
    trainer.extend(extensions.PrintReport(
        ['epoch', 'iteration', 
         'main/loss', 'validation/main/loss', 'elapsed_time']),
                  trigger=display_interval)
    trainer.extend(extensions.ProgressBar(update_interval=1))
    
    trainer.extend(extensions.snapshot_object(
        yolov3, 'yolov3_snapshot.npz'), 
        trigger=training.triggers.MinValueTrigger(
            'validation/main/loss', snapshot_interval))
    trainer.extend(extensions.snapshot_object(
        yolov3, 'yolov3_final.npz'), 
        trigger=snapshot_interval)
    
    trainer.extend(DarknetShift(
        optimizer, 'steps', args.iteration, burn_in=1000,
        steps=[args.iteration-10200,args.iteration-5200], scales=[0.1,0.1]
    ))
    trainer.extend(CropSizeUpdater(train, 
                                   [(10+i)*32 for i in range(0,5)],
                                   args.iteration - 200))
    
    detector = YOLOv3Predictor(yolov3, thresh=args.thresh)
    class_names = load_list('./data/voc.names')
    trainer.extend(YOLODetection(
        detector, 
        ['./data/image/dog.jpg'],
        class_names, size=(416, 416) ,thresh=args.thresh,
        trigger=display_interval, device=device
    ))
    
    trainer.run()
Exemple #15
0
def main():
    parser = argparse.ArgumentParser(description='Chainer Darknet53 Train')
    parser.add_argument('--batchsize', '-b', type=int, default=8)
    parser.add_argument('--iteration', '-i', type=int, default=100000)
    parser.add_argument('--gpus', '-g', type=int, nargs='*', default=[])
    parser.add_argument('--out', '-o', default='darknet53-voc-result')
    parser.add_argument('--seed', default=0)
    parser.add_argument('--display_interval', type=int, default=100)
    parser.add_argument('--snapshot_interval', type=int, default=100)
    parser.add_argument('--validation_size', type=int, default=2048)
    args = parser.parse_args()

    print('GPUs: {}'.format(args.gpus))
    print('# Minibatch-size: {}'.format(args.batchsize))
    print('# iteration: {}'.format(args.iteration))
    print('')

    random.seed(args.seed)
    np.random.seed(args.seed)

    darknet53 = Darknet53(20)
    model = L.Classifier(darknet53)
    device = -1
    if len(args.gpus) > 0:
        device = args.gpus[0]
        cuda.cupy.random.seed(args.seed)
        cuda.get_device_from_id(args.gpus[0]).use()
    if len(args.gpus) == 1:
        model.to_gpu()

    optimizer = chainer.optimizers.MomentumSGD(lr=0.001)
    optimizer.setup(model)
    optimizer.add_hook(chainer.optimizer_hooks.WeightDecay(0.0005),
                       'hook_decay')

    train = VOCBboxDataset(split='train')
    test = VOCBboxDataset(split='val')
    train = YOLOVOCDataset(train,
                           classifier=True,
                           jitter=0.2,
                           hue=0.1,
                           sat=.75,
                           val=.75)
    test = YOLOVOCDataset(test, classifier=True, crop_size=(256, 256))
    test = test[np.random.permutation(np.arange(
        len(test)))[:min(args.validation_size, len(test))]]

    train_iter = chainer.iterators.SerialIterator(train, args.batchsize)
    test_iter = chainer.iterators.SerialIterator(test,
                                                 args.batchsize,
                                                 repeat=False,
                                                 shuffle=False)

    if len(args.gpus) <= 1:
        updater = training.StandardUpdater(train_iter,
                                           optimizer,
                                           device=device)
    else:
        devices = {'main': args.gpus[0]}
        for gpu in args.gpus[1:]:
            devices['gpu{}'.format(gpu)] = gpu
        updater = training.ParallelUpdater(train_iter,
                                           optimizer,
                                           devices=devices)

    trainer = training.Trainer(updater, (args.iteration, 'iteration'),
                               out=args.out)

    display_interval = (args.display_interval, 'iteration')
    snapshot_interval = (args.snapshot_interval, 'iteration')

    trainer.extend(extensions.Evaluator(test_iter, model, device=device),
                   trigger=display_interval)
    trainer.extend(extensions.dump_graph('main/loss'))
    trainer.extend(extensions.LogReport(trigger=display_interval))
    if extensions.PlotReport.available():
        trainer.extend(
            extensions.PlotReport(['main/loss', 'validation/main/loss'],
                                  'iteration',
                                  display_interval,
                                  file_name='loss.png'))
        trainer.extend(
            extensions.PlotReport(
                ['main/accuracy', 'validation/main/accuracy'],
                'iteration',
                display_interval,
                file_name='accuracy.png'))

    trainer.extend(extensions.PrintReport([
        'epoch', 'iteration', 'main/loss', 'validation/main/loss',
        'main/accuracy', 'validation/main/accuracy', 'elapsed_time'
    ]),
                   trigger=display_interval)
    trainer.extend(extensions.ProgressBar(update_interval=5))
    trainer.extend(extensions.snapshot_object(darknet53,
                                              'darknet53_snapshot.npz'),
                   trigger=training.triggers.MinValueTrigger(
                       'validation/main/loss', snapshot_interval))
    trainer.extend(extensions.snapshot_object(darknet53,
                                              'darknet53_final.npz'),
                   trigger=snapshot_interval)

    trainer.extend(DarknetShift(optimizer, 'poly', args.iteration))

    trainer.extend(CropSizeUpdater(train,
                                   [(4 + i) * 32 for i in range(0, 11)]))

    trainer.run()
Exemple #16
0
def main():
    parser = argparse.ArgumentParser(description='Chainer CIFAR example:')
    parser.add_argument('--dataset',
                        '-d',
                        default='cifar10',
                        help='The dataset to use: cifar10 or cifar100')
    parser.add_argument('--batchsize',
                        '-b',
                        type=int,
                        default=100,
                        help='Number of images in each mini-batch')
    parser.add_argument('--same_batch',
                        '-s',
                        type=bool,
                        default=False,
                        help='if True and use multi gpu, batchsize*gpu_num')
    parser.add_argument('--epoch',
                        '-e',
                        type=int,
                        default=20,
                        help='Number of sweeps over the dataset to train')
    parser.add_argument('--gpu_num',
                        '-gn',
                        type=int,
                        default=1,
                        help='a number of GPU(negative value indicates CPU)')
    parser.add_argument('--gpu',
                        '-g',
                        type=int,
                        default=0,
                        help='main GPU ID (negative value indicates CPU)')
    parser.add_argument('--model',
                        '-m',
                        default='allconvnet',
                        help='choose training model')
    parser.add_argument('--out',
                        '-o',
                        default='result',
                        help='Directory to output the result')
    parser.add_argument('--resume',
                        '-r',
                        default='',
                        help='Resume the training from snapshot')

    args = parser.parse_args()
    print('# a number of using GPU: {}'.format(args.gpu_num))
    print('# Minibatch-size: {}'.format(args.batchsize))
    print('# epoch: {}'.format(args.epoch))

    # make dump name with this experiment
    dump_dir = './result/train_log' + '_gpu_num-' + str(
        args.gpu_num) + "_model-" + str(args.model) + '_epoch-' + str(
            args.epoch) + '_batchsize-' + str(
                args.batchsize) + '_datset-' + str(args.dataset)

    # Set up a neural network to train.
    # Classifier reports softmax cross entropy loss and accuracy at every
    # iteration, which will be used by the PrintReport extension below.
    if args.dataset == 'cifar10':
        print('# Using CIFAR10 dataset.')
        class_labels = 10
        train, test = get_cifar10()
    elif args.dataset == 'cifar100':
        print('# Using CIFAR100 dataset.')
        class_labels = 100
        train, test = get_cifar100()
    else:
        raise RuntimeError('Invalid dataset choice.')
    if args.model == 'resnet':
        print('# cnn_model: resnet')
        model = L.Classifier(ResNet(class_labels=class_labels))
    elif args.model == 'allconvnet':
        print('# cnn_model: AllConvNetBN')
        model = L.Classifier(AllConvNetBN(class_labels))
    else:
        raise RuntimeError('Invalid dataset choice.')

    if args.gpu >= 0 and args.gpu_num >= 1:
        chainer.cuda.get_device(args.gpu).use()  # Make a specified GPU current

    #optimizer = chainer.optimizers.MomentumSGD(0.01)
    optimizer = chainer.optimizers.Adam()
    optimizer.setup(model)
    #optimizer.add_hook(chainer.optimizer.WeightDecay(5e-4))

    #multi gpu環境、つまりParallelUpdaterを使った並列GPU処理だとbatchsize = batchsize/gpu_num
    batchsize = args.batchsize * args.gpu_num if args.same_batch else args.batchsize
    train_iter = chainer.iterators.SerialIterator(train, batchsize)
    test_iter = chainer.iterators.SerialIterator(test,
                                                 batchsize,
                                                 repeat=False,
                                                 shuffle=False)
    # Set up a trainer
    if args.gpu_num <= 1:
        print("# main gpu: ", args.gpu)
        model.to_gpu()  # Copy the model to the GPU
        updater = training.StandardUpdater(train_iter,
                                           optimizer,
                                           device=args.gpu)
    elif args.gpu_num >= 2:
        _devices = {'main': args.gpu}
        print("# main gpu: ", args.gpu)
        for g_idx in range(1, args.gpu_num):
            _devices[str(g_idx)] = g_idx
        print("# using gpus: ", _devices)
        updater = training.ParallelUpdater(
            train_iter,
            optimizer,
            devices=_devices,
        )
    trainer = training.Trainer(updater, (args.epoch, 'epoch'), out=dump_dir)

    # Evaluate the model with the test dataset for each epoch
    trainer.extend(TestModeEvaluator(test_iter, model, device=args.gpu))

    # Reduce the learning rate by half every 25 epochs.
    trainer.extend(extensions.ExponentialShift('alpha', 0.5),
                   trigger=(20, 'epoch'))

    # Dump a computational graph from 'loss' variable at the first iteration
    # The "main" refers to the target link of the "main" optimizer.
    trainer.extend(extensions.dump_graph('main/loss'))

    # Take a snapshot at each epoch
    trainer.extend(extensions.snapshot(), trigger=(args.epoch, 'epoch'))

    # Write a log of evaluation statistics for each epoch
    trainer.extend(extensions.LogReport())

    # Print selected entries of the log to stdout
    # Here "main" refers to the target link of the "main" optimizer again, and
    # "validation" refers to the default name of the Evaluator extension.
    # Entries other than 'epoch' are reported by the Classifier link, called by
    # either the updater or the evaluator.
    trainer.extend(
        extensions.PrintReport([
            'epoch', 'main/loss', 'validation/main/loss', 'main/accuracy',
            'validation/main/accuracy', 'elapsed_time'
        ]))

    # Print a progress bar to stdout
    trainer.extend(extensions.ProgressBar())

    if args.resume:
        print('Resume from a snapshot')
        chainer.serializers.load_npz(args.resume, trainer)

    # Run the training
    trainer.run()
Exemple #17
0
def main():
    parser = argparse.ArgumentParser()
    pa = parser.add_argument

    pa('--gpu', type=str, default='0')
    pa('--epoch', type=int, default=20)
    pa('--debug_mode', action='store_true')
    pa('--resume', type=str, default='')

    pa('--data_dir', type=str, default='../bi-att-flow/data/squad_nonsplit')
    pa('--ckpt_path', type=str, default='logs')
    pa('--log_path', type=str, default='logs')

    pa('--batch_size', type=int, default=60)
    pa('--display_step', type=int, default=50)
    pa('--eval_step', type=int, default=500)

    pa('--init_lr', type=float, default=0.5)
    pa('--optimizer', type=str, default='adadelta')
    pa('--decay_rate', type=float, default=0.999)
    pa('--dropout_rate', type=float, default=0.2)
    pa('--no_ema', action='store_true')

    pa('--hidden_size', type=int, default=100)
    pa('--word_emb_dim', type=int, default=100)
    pa('--char_emb_dim', type=int, default=8)
    pa('--char_conv_n_kernel', type=int, default=100)
    pa('--char_conv_height', type=int, default=5)
    pa('--char_out_dim', type=int, default=100)

    pa('--highway_n_layer', type=int, default=2)

    pa('--word_count_th', type=int, default=10)
    pa('--char_count_th', type=int, default=50)
    pa('--sent_size_th', type=int, default=195)  # 400
    pa('--para_size_th', type=int, default=256)
    pa('--num_sents_th', type=int, default=8)
    pa('--ques_size_th', type=int, default=30)
    pa('--word_size_th', type=int, default=16)

    config = parser.parse_args()
    print(json.dumps(config.__dict__, indent=4))

    train_data, test_data, vocab = load_dataset(config)
    config = update_config(config, [train_data, test_data], vocab)

    config.gpu = [int(g) for g in config.gpu.split(',')]
    config.enc_dim = config.word_emb_dim + config.char_out_dim

    model = BiDAF(config)
    if config.resume:
        serializers.load_npz(config.resume, model)

    # optimizer
    if config.optimizer == 'adam':
        optimizer = chainer.optimizers.Adam(0.001)
    else:
        optimizer = AdaDeltaWithLearningRate(lr=config.init_lr, eps=1e-08)
    optimizer.setup(model)
    model.word_emb.W.update_rule.enabled = False

    # iterator
    train_iter = MultiprocessIterator(train_data,
                                      config.batch_size,
                                      repeat=True,
                                      shuffle=True)
    test_iter = MultiprocessIterator(test_data,
                                     config.batch_size,
                                     repeat=False,
                                     shuffle=False)

    # updater, trainer
    if len(config.gpu) == 2:
        multi_devices = {
            'main': int(config.gpu[0]),
            'second': int(config.gpu[1])
        }
        updater = training.ParallelUpdater(train_iter,
                                           optimizer,
                                           converter=squad_converter,
                                           devices=multi_devices)
    elif len(config.gpu) == 1:
        if config.gpu[0] >= 0:
            model.to_gpu(config.gpu[0])
        updater = training.StandardUpdater(train_iter,
                                           optimizer,
                                           converter=squad_converter,
                                           device=config.gpu[0])
    trainer = training.Trainer(updater, (config.epoch, 'epoch'),
                               out=config.log_path)

    evaluator = BiDAFEvaluator(test_iter,
                               model,
                               config,
                               converter=squad_converter,
                               device=config.gpu[0])
    evaluator.name = 'val'

    iter_per_epoch = len(train_data) // config.batch_size
    print('Iter/epoch =', iter_per_epoch)

    log_trigger = (min(config.display_step, iter_per_epoch // 2), 'iteration')
    eval_trigger = (config.eval_step,
                    'iteration') if iter_per_epoch > config.eval_step else (
                        1, 'epoch')
    record_trigger = training.triggers.MaxValueTrigger('val/main/f1',
                                                       eval_trigger)

    trainer.extend(extensions.snapshot_object(
        model, 'model_epoch_{.updater.epoch}.npz'),
                   trigger=record_trigger)
    trainer.extend(evaluator, trigger=eval_trigger)
    trainer.extend(
        extensions.LogReport(trigger=log_trigger, log_name='iteration.log'))
    trainer.extend(
        extensions.LogReport(trigger=eval_trigger, log_name='epoch.log'))
    trainer.extend(extensions.LogReport())
    trainer.extend(
        extensions.PrintReport([
            'epoch', 'iteration', 'main/loss', 'main/match', 'main/f1',
            'val/main/loss', 'val/main/match', 'val/main/f1', 'elapsed_time'
        ]))

    trainer.run()
def main():
    # This script is almost identical to train_mnist.py. The only difference is
    # that this script uses data-parallel computation on two GPUs.
    # See train_mnist.py for more details.
    parser = argparse.ArgumentParser(description='Chainer example: MNIST')
    parser.add_argument('--batchsize',
                        '-b',
                        type=int,
                        default=400,
                        help='Number of images in each mini-batch')
    parser.add_argument('--epoch',
                        '-e',
                        type=int,
                        default=20,
                        help='Number of sweeps over the dataset to train')
    parser.add_argument('--gpu0',
                        '-g',
                        type=int,
                        default=0,
                        help='First GPU ID')
    parser.add_argument('--gpu1',
                        '-G',
                        type=int,
                        default=1,
                        help='Second GPU ID')
    parser.add_argument('--out',
                        '-o',
                        default='result_parallel',
                        help='Directory to output the result')
    parser.add_argument('--resume',
                        '-r',
                        default='',
                        help='Resume the training from snapshot')
    parser.add_argument('--unit',
                        '-u',
                        type=int,
                        default=1000,
                        help='Number of units')
    args = parser.parse_args()

    print('GPU: {}, {}'.format(args.gpu0, args.gpu1))
    print('# unit: {}'.format(args.unit))
    print('# Minibatch-size: {}'.format(args.batchsize))
    print('# epoch: {}'.format(args.epoch))
    print('')

    chainer.cuda.get_device(args.gpu0).use()

    model = L.Classifier(train_mnist.MLP(args.unit, 10))
    optimizer = chainer.optimizers.Adam()
    optimizer.setup(model)

    train, test = chainer.datasets.get_mnist()
    train_iter = chainer.iterators.SerialIterator(train, args.batchsize)
    test_iter = chainer.iterators.SerialIterator(test,
                                                 args.batchsize,
                                                 repeat=False,
                                                 shuffle=False)

    # ParallelUpdater implements the data-parallel gradient computation on
    # multiple GPUs. It accepts "devices" argument that specifies which GPU to
    # use.
    updater = training.ParallelUpdater(
        train_iter,
        optimizer,
        # The device of the name 'main' is used as a "master", while others are
        # used as slaves. Names other than 'main' are arbitrary.
        devices={
            'main': args.gpu0,
            'second': args.gpu1
        },
    )
    trainer = training.Trainer(updater, (args.epoch, 'epoch'), out=args.out)

    trainer.extend(extensions.Evaluator(test_iter, model, device=args.gpu0))
    trainer.extend(extensions.dump_graph('main/loss'))
    trainer.extend(extensions.snapshot(), trigger=(args.epoch, 'epoch'))
    trainer.extend(extensions.LogReport())
    trainer.extend(
        extensions.PrintReport([
            'epoch', 'main/loss', 'validation/main/loss', 'main/accuracy',
            'validation/main/accuracy', 'elapsed_time'
        ]))
    trainer.extend(extensions.ProgressBar())

    if args.resume:
        chainer.serializers.load_npz(args.resume, trainer)

    trainer.run()
Exemple #19
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("--config_path", type=str, default="config.ini")
    parser.add_argument("--resume")
    args = parser.parse_args()

    config = configparser.ConfigParser()
    config.read(args.config_path, "UTF-8")
    train_param = utils.get_config(config)

    chainer.global_config.autotune = True
    chainer.cuda.set_max_workspace_size(11388608)
    chainer.config.cudnn_fast_batch_normalization = True
    logger.info("> set up devices")
    if chainer.backends.cuda.available:
        devices = utils.setup_devices(train_param["gpus"])
    else:
        # cpu run
        devices = {"main": -1}
    logger.info("> set devices {}".format(devices))
    utils.set_random_seed(devices, train_param["seed"])

    # get dataset
    logger.info("> get dataset")
    train, test = select_dataset(config, return_data=["train_set", "val_set"])
    logger.info("> size of train {}".format(len(train)))
    logger.info("> size of test {}".format(len(test)))
    # create result dir and copy file
    result = config["output_path"]["result_dir"]
    logger.info("> store file to result dir {}".format(result))
    utils.create_result_dir(result)
    destination = os.path.join(result, "detector")
    logger.info("> store config.ini to {}".format(
        os.path.join(destination, "config.ini")))
    if not os.path.exists(destination):
        os.makedirs(destination)
    shutil.copy(args.config_path, os.path.join(destination, "config.ini"))
    # load model
    logger.info("> load model")
    model = utils.create_ssd_model(train_param)

    model.use_preset("evaluate")
    train_chain = MultiboxTrainChain(model, beta=4)

    logger.info("> transform dataset")

    train = TransformDataset(
        train, Transform(model.coder, model.insize, model.mean, train=True))
    train_iter = chainer.iterators.MultiprocessIterator(
        train,
        train_param["batchsize"],
        n_processes=train_param["num_process"])

    test = TransformDataset(
        test, Transform(model.coder, model.insize, model.mean, train=False))
    test_iter = chainer.iterators.MultiprocessIterator(
        test,
        train_param["batchsize"],
        repeat=False,
        shuffle=False,
        n_processes=4)

    # initial lr is set to 1e-3 by ExponentialShift
    logger.info("> set up optimizer")
    optimizer = chainer.optimizers.MomentumSGD(lr=train_param["learning_rate"])
    # optimizer = chainer.optimizers.RMSprop(lr=train_param["learning_rate"])
    optimizer.setup(train_chain)
    for param in train_chain.params():
        if param.name == "b":
            param.update_rule.add_hook(GradientScaling(2))
        else:
            param.update_rule.add_hook(WeightDecay(0.0005))

    updater = training.ParallelUpdater(train_iter, optimizer, devices=devices)
    trainer = training.Trainer(
        updater,
        (train_param["train_iter"], "iteration"),
        destination,
    )
    trainer.extend(
        extensions.ExponentialShift("lr",
                                    0.1,
                                    init=train_param["learning_rate"]),
        trigger=triggers.ManualScheduleTrigger(train_param["schedule"],
                                               "iteration"))

    # set current device to devices["main"]
    # with chainer.cuda.Device(devices["main"]):
    eval_interval = 500, "iteration"
    logger.info("setup evaluator {}".format(train_param["hand_class"]))
    trainer.extend(
        DetectionCOCOEvaluator(
            test_iter,
            model,
            device=devices["main"],
            label_names=train_param["hand_class"],
        ),
        trigger=eval_interval,
    )

    log_interval = 100, "iteration"
    trainer.extend(extensions.LogReport(trigger=log_interval))
    trainer.extend(extensions.observe_lr(), trigger=log_interval)
    trainer.extend(extensions.PrintReport([
        "epoch",
        "iteration",
        "lr",
        "main/loss",
        "main/loss/loc",
        "main/loss/conf",
        "validation/main/map",
    ]),
                   trigger=log_interval)
    trainer.extend(extensions.ProgressBar(update_interval=100))

    trainer.extend(extensions.snapshot(filename="best_snapshot"),
                   trigger=MaxValueTrigger("validation/main/map",
                                           trigger=eval_interval))
    trainer.extend(extensions.snapshot_object(model, filename="bestmodel.npz"),
                   trigger=MaxValueTrigger("validation/main/map",
                                           trigger=eval_interval))

    if extensions.PlotReport.available():
        trainer.extend(
            extensions.PlotReport(["main/loss", "validation/main/loss"],
                                  x_key="iteration",
                                  file_name="loss.png"))
        trainer.extend(
            extensions.PlotReport(["main/accuracy/map", "validation/main/map"],
                                  x_key="iteration",
                                  file_name="accuracy.png"))

    if args.resume:
        serializers.load_npz(args.resume, trainer)

    logger.info("> run trainer")
    trainer.run()
Exemple #20
0
def main():
    '''
    main function, start point
    '''
    # 引数関連
    parser = argparse.ArgumentParser()
    parser.add_argument('--batchsize',
                        '-b',
                        type=int,
                        default=128,
                        help='Number of images in each mini-batch')
    parser.add_argument('--learnrate',
                        '-l',
                        type=float,
                        default=0.001,
                        help='Learning rate for SGD')
    parser.add_argument('--epoch',
                        '-e',
                        type=int,
                        default=100,
                        help='Number of sweeps over the dataset to train')
    parser.add_argument('--gpu0',
                        '-g',
                        type=int,
                        default=0,
                        help='GPU1 ID (negative value indicates CPU)')
    parser.add_argument('--gpu1',
                        '-G',
                        type=int,
                        default=2,
                        help='GPU2 ID (negative value indicates CPU)')
    parser.add_argument('--resume',
                        '-r',
                        default='',
                        help='Resume the training from snapshot')
    parser.add_argument('--iter_parallel',
                        '-p',
                        action='store_true',
                        default=False,
                        help='loading dataset from disk')
    parser.add_argument('--opt',
                        '-o',
                        type=str,
                        choices=('adam', 'sgd'),
                        default='adam')
    parser.add_argument('--fsize', '-f', type=int, default=5)
    parser.add_argument('--ch', '-c', type=int, default=4)
    args = parser.parse_args()

    # parameter出力
    print("-=Learning Parameter=-")
    print("# Max Epochs: {}".format(args.epoch))
    print("# Batch Size: {}".format(args.batchsize))
    print("# Learning Rate: {}".format(args.learnrate))
    print("# Optimizer Method: {}".format(args.opt))
    print("# Filter Size: {}".format(args.fsize))
    print("# Channel Scale: {}".format(args.ch))
    print('# Train Dataet: General 100')
    if args.iter_parallel:
        print("# Data Iters that loads in Parallel")
    print("\n")

    # 保存ディレクトリ
    # save didrectory
    model_dir_name = 'AEFINet_concat_parallel_opt_{}_ch_{}_fsize_{}'.format(
        args.opt, args.ch, args.fsize)
    outdir = path.join(ROOT_PATH, 'results', 'FI', 'AEFINet', model_dir_name)
    if not path.exists(outdir):
        os.makedirs(outdir)
    with open(path.join(outdir, 'arg_param.txt'), 'w') as f:
        for k, v in args.__dict__.items():
            f.write('{}:{}\n'.format(k, v))

    #loading dataset
    print('# loading dataet(General100_train, General100_test) ...')
    if args.iter_parallel:
        train = ds.SequenceDataset(dataset='train')
        test = ds.SequenceDataset(dataset='test')
    else:
        train = ds.SequenceDatasetOnMem(dataset='train')
        test = ds.SequenceDatasetOnMem(dataset='test')

    chainer.cuda.get_device_from_id(args.gpu0).use()

    # prepare model
    model = N.AEFINetConcat(f_size=args.fsize, ch=args.ch)
    # model.to_gpu()

    # setup optimizer
    if args.opt == 'adam':
        optimizer = chainer.optimizers.Adam(alpha=args.learnrate)
    elif args.opt == 'sgd':
        optimizer = chainer.optimizers.MomentumSGD(lr=args.learnrate,
                                                   momentum=0.9)
    optimizer.setup(model)
    optimizer.add_hook(chainer.optimizer.WeightDecay(0.0001))

    # setup iter
    if args.iter_parallel:
        train_iter = chainer.iterators.MultiprocessIterator(train,
                                                            args.batchsize,
                                                            n_processes=8)
        test_iter = chainer.iterators.MultiprocessIterator(test,
                                                           args.batchsize,
                                                           repeat=False,
                                                           shuffle=False,
                                                           n_processes=8)
    else:
        train_iter = chainer.iterators.SerialIterator(train, args.batchsize)
        test_iter = chainer.iterators.SerialIterator(test,
                                                     args.batchsize,
                                                     repeat=False,
                                                     shuffle=False)

    # setup trainer
    updater = training.ParallelUpdater(
        train_iter,
        optimizer,
        devices={
            'main': args.gpu0,
            'second': args.gpu1
        },
    )
    trainer = training.Trainer(updater, (args.epoch, 'epoch'), out=outdir)

    # # eval test data
    trainer.extend(extensions.Evaluator(test_iter, model, device=args.gpu0))
    # dump loss graph
    trainer.extend(extensions.dump_graph('main/loss'))
    # lr shift
    if args.opt == 'sgd':
        trainer.extend(extensions.ExponentialShift("lr", 0.1),
                       trigger=(100, 'epoch'))
    elif args.opt == 'adam':
        trainer.extend(extensions.ExponentialShift("alpha", 0.1),
                       trigger=(100, 'epoch'))
    # save snapshot
    trainer.extend(extensions.snapshot(), trigger=(10, 'epoch'))
    trainer.extend(extensions.snapshot_object(
        model, 'model_snapshot_{.updater.epoch}'),
                   trigger=(10, 'epoch'))
    # log report
    trainer.extend(extensions.LogReport())
    trainer.extend(extensions.observe_lr(), trigger=(1, 'epoch'))
    #  plot loss graph
    trainer.extend(
        extensions.PlotReport(['main/loss', 'validation/main/loss'],
                              'epoch',
                              file_name='loss.png'))
    # plot acc graph
    trainer.extend(
        extensions.PlotReport(['main/PSNR', 'validation/main/PSNR'],
                              'epoch',
                              file_name='PSNR.png'))
    # print info
    trainer.extend(
        extensions.PrintReport([
            'epoch', 'main/loss', 'validation/main/loss', 'main/PSNR',
            'validation/main/PSNR', 'lr', 'elapsed_time'
        ]))
    # print progbar
    trainer.extend(extensions.ProgressBar())

    # [ChainerUI] enable to send commands from ChainerUI
    trainer.extend(CommandsExtension())
    # [ChainerUI] save 'args' to show experimental conditions
    save_args(args, outdir)

    if args.resume:
        # Resume from a snapshot
        chainer.serializers.load_npz(args.resume, trainer)

    trainer.run()

    # save final model
    model_outdir = path.join(ROOT_PATH, 'models', model_dir_name)
    if not path.exists(model_outdir):
        os.makedirs(model_outdir)
    model_name = 'AEFINet_concat_opt_{}_ch_{}_fsize_{}.npz'.format(
        args.opt, args.ch, args.fsize)
    chainer.serializers.save_npz(path.join(model_outdir, model_name), model)

    model_parameter = {
        'name': 'AEFINetConcat',
        'parameter': {
            'f_size': args.fsize,
            'ch': args.ch
        }
    }
    with open(path.join(model_outdir, 'model_parameter.json'), 'w') as f:
        json.dump(model_parameter, f)
Exemple #21
0
        args.translate, args.translate_range, args.min_dim,
        args.coord_normalize, args.gcn, args.n_joints, args.fname_index,
        args.joint_index, args.symmetric_joints, args.ignore_label)

    train_iter = iterators.MultiprocessIterator(train_dataset, args.batchsize)
    test_iter = iterators.MultiprocessIterator(test_dataset,
                                               args.batchsize,
                                               repeat=False,
                                               shuffle=False)

    gpus = [int(i) for i in args.gpus.split(',')]
    devices = {'main': gpus[0]}
    if len(gpus) > 2:
        for gid in gpus[1:]:
            devices.update({'gpu{}'.format(gid): gid})
    updater = training.ParallelUpdater(train_iter, opt, devices=devices)

    interval = (args.snapshot, 'epoch')
    trainer = training.Trainer(updater, (args.epoch, 'epoch'), out=result_dir)
    trainer.extend(extensions.dump_graph('main/loss'))

    # Save parameters and optimization state
    trainer.extend(extensions.snapshot_object(model,
                                              'epoch-{.updater.epoch}.model'),
                   trigger=interval)
    trainer.extend(extensions.snapshot_object(opt,
                                              'epoch-{.updater.epoch}.state'),
                   trigger=interval)
    trainer.extend(extensions.snapshot(), trigger=interval)

    if args.opt == 'MomentumSGD' or args.opt == 'AdaGrad':
Exemple #22
0
def main(args):

    assert((args.depth - args.block - 1) % args.block == 0)
    n_layer = (args.depth - args.block - 1) / args.block
    
    if args.dataset == 'cifar10':
        mean = numpy.asarray((125.3,123.0,113.9))#from fb.resnet.torch
        std = numpy.asarray((63.0, 62.1, 66.7))# Did the std data computed from 0 padding images?
        train, test = dataset.EXget_cifar10(scale=255,mean=mean,std=std)
        
        n_class = 10
    elif args.dataset == 'cifar100':
        mean = numpy.asarray((129.3,124.1,112.4))#from fb.resnet.torch
        std = numpy.asarray((68.2, 65.4, 70.4))
        train, test = dataset.EXget_cifar100(scale=255,mean=mean,std=std)
        
        n_class = 100
    elif args.dataset == 'SVHN':
        raise NotImplementedError()

    train = PreprocessedDataset(train, random=True)
    test = PreprocessedDataset(test)

    train_iter = chainer.iterators.MultiprocessIterator(train, args.batchsize)
    test_iter = chainer.iterators.MultiprocessIterator(
        test, args.batchsize, repeat=False, shuffle=False)

    model = chainer.links.Classifier(DenseNet(n_layer, args.growth_rate, n_class, args.drop_ratio, 16, args.block))
    if args.init_model:
        serializers.load_npz(args.init_model, model)

    import EXoptimizers
    optimizer = EXoptimizers.originalNesterovAG(lr=args.lr / len(args.gpus), momentum=0.9)
    optimizer.setup(model)
    optimizer.add_hook(chainer.optimizer.WeightDecay(args.weight_decay))

    devices = {'main': args.gpus[0]}
    if len(args.gpus) > 1:
        for gid in args.gpus[1:]:
            devices['gpu%d' % gid] = gid
    updater = training.ParallelUpdater(train_iter, optimizer, devices=devices)
    trainer = training.Trainer(updater, (args.epoch, 'epoch'), out=args.dir)

    val_interval = (1, 'epoch')
    log_interval = (1, 'epoch')

    def lr_shift():  # DenseNet specific!
        if updater.epoch == 151 or updater.epoch == 226:
            optimizer.lr *= 0.1
        return optimizer.lr

    trainer.extend(Evaluator(
        test_iter, model, device=args.gpus[0]), trigger=val_interval)
    trainer.extend(extensions.observe_value(
        'lr', lambda _: lr_shift()), trigger=(1, 'epoch'))
    trainer.extend(extensions.dump_graph('main/loss'))
    trainer.extend(extensions.snapshot_object(
        model, 'epoch_{.updater.epoch}.model'), trigger=val_interval)
    trainer.extend(extensions.snapshot_object(
        optimizer, 'epoch_{.updater.epoch}.state'), trigger=val_interval)
    trainer.extend(extensions.LogReport(trigger=log_interval))
    trainer.extend(extensions.observe_lr(), trigger=log_interval)
    start_time = time.time()
    trainer.extend(extensions.observe_value(
        'time', lambda _: time.time() - start_time), trigger=log_interval)
    trainer.extend(extensions.PrintReport([
        'time', 'epoch', 'iteration', 'main/loss', 'validation/main/loss',
        'main/accuracy', 'validation/main/accuracy', 'lr',
    ]), trigger=log_interval)
    trainer.extend(extensions.observe_value(
        'graph', lambda _: create_fig(args.dir)), trigger=(2, 'epoch'))
    trainer.extend(extensions.ProgressBar(update_interval=10))

    trainer.run()
Exemple #23
0
def main(args):

    assert ((args.depth - args.block - 1) % args.block == 0)
    n_layer = (args.depth - args.block - 1) / args.block
    if args.dataset == 'cifar10':
        train, test = cifar.get_cifar10()
        n_class = 10
    elif args.dataset == 'cifar100':
        train, test = cifar.get_cifar100()
        n_class = 100
    elif args.dataset == 'SVHN':
        raise NotImplementedError()

    mean = numpy.zeros((3, 32, 32), dtype=numpy.float32)
    for image, _ in train:
        mean += image / len(train)

    train = PreprocessedDataset(train, mean, random=True)
    test = PreprocessedDataset(test, mean)

    train_iter = chainer.iterators.MultiprocessIterator(train, args.batchsize)
    test_iter = chainer.iterators.MultiprocessIterator(test,
                                                       args.batchsize,
                                                       repeat=False,
                                                       shuffle=False)

    model = chainer.links.Classifier(
        DenseNet(n_layer, args.growth_rate, n_class, args.drop_ratio, 16,
                 args.block))
    if args.init_model:
        serializers.load_npz(args.init_model, model)

    optimizer = chainer.optimizers.MomentumSGD(lr=args.lr / len(args.gpus),
                                               momentum=0.9)
    optimizer.setup(model)
    optimizer.add_hook(chainer.optimizer.WeightDecay(args.weight_decay))

    devices = {'main': args.gpus[0]}
    if len(args.gpus) > 2:
        for gid in args.gpus[1:]:
            devices['gpu%d' % gid] = gid
    updater = training.ParallelUpdater(train_iter, optimizer, devices=devices)
    trainer = training.Trainer(updater, (args.epoch, 'epoch'), out=args.dir)

    val_interval = (1, 'epoch')
    log_interval = (1, 'epoch')

    eval_model = model.copy()
    eval_model.train = False

    trainer.extend(extensions.Evaluator(test_iter,
                                        eval_model,
                                        device=args.gpus[0]),
                   trigger=val_interval)
    trainer.extend(extensions.ExponentialShift('lr', args.lr_decay_ratio),
                   trigger=(args.lr_decay_freq, 'epoch'))
    trainer.extend(extensions.dump_graph('main/loss'))
    trainer.extend(extensions.snapshot_object(model,
                                              'epoch_{.updater.epoch}.model'),
                   trigger=val_interval)
    trainer.extend(extensions.snapshot_object(optimizer,
                                              'epoch_{.updater.epoch}.state'),
                   trigger=val_interval)
    trainer.extend(extensions.LogReport(trigger=log_interval))
    trainer.extend(extensions.observe_lr(), trigger=log_interval)
    start_time = time.time()
    trainer.extend(extensions.observe_value(
        'time', lambda _: time.time() - start_time),
                   trigger=log_interval)
    trainer.extend(extensions.PrintReport([
        'time',
        'epoch',
        'iteration',
        'main/loss',
        'validation/main/loss',
        'main/accuracy',
        'validation/main/accuracy',
        'lr',
    ]),
                   trigger=log_interval)
    trainer.extend(extensions.observe_value('graph',
                                            lambda _: create_fig(args.dir)),
                   trigger=(2, 'epoch'))
    trainer.extend(extensions.ProgressBar(update_interval=10))

    trainer.run()
Exemple #24
0
    batchsize = 100
    train_iter = iterators.SerialIterator(train_data, batchsize)
    test_iter = iterators.SerialIterator(test_data, batchsize, repeat = False, shuffle = False)
#    import pdb; pdb.set_trace()

    # setup model
    model = LSTM(IN_UNITS, HIDDEN_UNITS, OUT_UNITS)
 
    # setup optimizer
    optimizer = optimizers.Adam()
    optimizer.setup(model)
 
    start = time.time()
    
#    updater = training.StandardUpdater(train_iter, optimizer, MyConverter)
    updater = training.ParallelUpdater(train_iter, optimizer, MyConverter, devices={'main': -1, 'second': -2})
    trainer = training.Trainer(updater, (20, 'epoch'), out='result')
    trainer.extend(extensions.LogReport())
    trainer.extend(extensions.dump_graph('main/loss'))
    trainer.extend(extensions.observe_lr())
    trainer.extend(extensions.Evaluator(test_iter, model, MyConverter), name= 'val')
    trainer.extend(extensions.PrintReport(['epoch', 'main/loss', 'val/main/loss', 'elapsed_time', 'lr']))
    trainer.extend(extensions.PlotReport(['main/loss', 'val/main/loss'], x_key = 'epoch', file_name= 'loss.png'))
    trainer.extend(extensions.ProgressBar())
    trainer.run()

    end = time.time()
 
    print("{}[sec]".format(end - start))

Exemple #25
0
def main():
    FLAGS(sys.argv)

    # 0. load dataset
    char_list = [
        line.strip().split('\t')[0] for line in open(FLAGS.vocab_file)
    ]
    char_to_id = char2id = {c: i for i, c in enumerate(char_list)}

    h5f = h5py.File(path.normpath(FLAGS.data_file), 'r')
    data = h5f['data'][:]
    train_data = data
    print_len = len(train_data[0])
    h5f.close()

    n, max_len = data.shape
    charset_size = len(char_list) + 1

    save_dir = path.normpath(FLAGS.save_dir)

    # 1. build model
    if FLAGS.model == 'rnnlm':
        model = Decoder(charset_size=charset_size,
                        hidden_size=FLAGS.hidden_size,
                        n_layers=FLAGS.n_layers,
                        dropout=FLAGS.dropout)

    gpu_id_list = [int(_) for _ in FLAGS.gpu_id_list.split(',')
                   ] if len(FLAGS.gpu_id_list) > 0 else []

    if len(gpu_id_list) > 0:
        chainer.cuda.get_device_from_id(gpu_id_list[0]).use()
        model.to_gpu()

    load_model = path.join(save_dir, FLAGS.load_model)
    if os.path.exists(load_model):
        print('load model snapshot from %s' % load_model)
        serializers.load_npz(load_model, model)

    from lv import ZiFeature, calc_mask_5
    zf = ZiFeature()

    def func_mask(ys):
        return calc_mask_5(zf=zf,
                           prefix=ys,
                           char_list=char_list,
                           char2id=char2id,
                           offset=1)

    if FLAGS.demo_mode:
        print('demo starts. enter prefix or `exit` for exiting.')
        while True:
            line = sys.stdin.readline().strip()
            if line == 'exit':
                break
            guide_ids = [
                char_to_id[c] + 1 for c in line.strip() if c in char_to_id
            ]
            for t in [1., 1.5, 2., 2.5, 3.]:
                ys = model.sample(batch_size=5,
                                  use_random=True,
                                  temperature=t,
                                  max_len=print_len,
                                  guide_ids=guide_ids,
                                  func_mask=func_mask)
                for y in ys:
                    print('[t=%.3f] %s' % (t, gs(y, char_list)))
            print('-' * print_len)

        return

    optimizer = chainer.optimizers.Adam()
    optimizer.setup(model)

    train_iter = chainer.iterators.SerialIterator(train_data, FLAGS.batch_size)

    if len(gpu_id_list) == 0:
        updater = training.StandardUpdater(train_iter, optimizer, device=-1)
    elif len(gpu_id_list) == 1:
        gpu_id = gpu_id_list[0]
        updater = training.StandardUpdater(train_iter,
                                           optimizer,
                                           device=gpu_id)
    else:
        devices = {('main' if index == 0 else ('second%d' % gpu_id)): gpu_id
                   for index, gpu_id in enumerate(gpu_id_list)}
        print('multiple gpu training with devices = %s' % devices)
        updater = training.ParallelUpdater(
            train_iter,
            optimizer,
            devices=devices,
        )
    trainer = training.Trainer(updater,
                               stop_trigger=(FLAGS.n_epoch, 'epoch'),
                               out=save_dir)

    trainer.extend(
        extensions.LogReport(trigger=(FLAGS.log_interval, 'iteration')))
    trainer.extend(extensions.PrintReport(
        ['epoch', 'iteration', 'main/loss', 'main/perp', 'elapsed_time']),
                   trigger=(FLAGS.log_interval, 'iteration'))
    trainer.extend(
        extensions.snapshot(
            filename='trainer_snapshot_iter_{.updater.iteration}'))
    trainer.extend(
        extensions.snapshot(filename='trainer_snapshot_iter_latest'))
    trainer.extend(
        extensions.snapshot_object(
            target=model, filename='model_snapshot_iter_{.updater.iteration}'))
    trainer.extend(
        extensions.snapshot_object(target=model,
                                   filename='model_snapshot_iter_latest'))
    trainer.extend(extensions.ProgressBar())

    if FLAGS.show_sample:

        @chainer.training.make_extension()
        def sample(trainer):
            for temperature in [1.0, 1.3, 1.6, 1.9, 2.1]:
                print('sample (use random, t=%.2f):' % temperature)
                ys = model.sample(batch_size=2,
                                  use_random=True,
                                  temperature=temperature,
                                  max_len=print_len,
                                  func_mask=func_mask)
                for y in ys:
                    print('%s' % (gs(y, char_list)))
                    print('-' * print_len)
            print('sample (use max):')
            ys = model.sample(batch_size=1,
                              use_random=False,
                              func_mask=func_mask)
            for y in ys:
                print('%s' % (gs(y, char_list)))
                print('-' * print_len)

        trainer.extend(sample, trigger=(1, 'epoch'))

    load_trainer = path.join(save_dir, FLAGS.load_trainer)
    if os.path.exists(load_trainer):
        print('load trainer snapshot from %s' % load_trainer)
        serializers.load_npz(load_trainer, trainer)

    print('start training')
    trainer.run()