def do_training(self, model, x_train, y_train): params = self.params # optimizer = chainer.optimizers.SGD() if params["nb_gpus"] == 1: import cupy id_device = params["gpus"][0] cupy.cuda.Device(id_device).use() optimizer = chainer.optimizers.MomentumSGD(lr=0.001, momentum=0.95) optimizer.setup(model) for id_epoch in range(self.params["nb_epoch"]): print("epoch ", id_epoch) for data, target in zip(x_train, y_train): if self.params["nb_gpus"] == 1: # TODO: option for on-core training data = cupy.array(data) target = cupy.array(target) pred = model.predictor(data) loss = F.softmax_cross_entropy(pred, target) loss.backward() return # using Chainer's native iterators x_train = x_train.reshape((x_train.shape[0] * x_train.shape[1], ) + x_train.shape[2:]) y_train = y_train.reshape((y_train.shape[0] * y_train.shape[1], )) train = chainer.datasets.tuple_dataset.TupleDataset(x_train, y_train) if params["nb_gpus"] == 0: train_iter = chainer.iterators.SerialIterator( train, batch_size=params["batch_size"], repeat=True, shuffle=False) else: train_iter = chainer.iterators.MultiprocessIterator( train, batch_size=params["batch_size"], repeat=True, shuffle=True, n_processes=4) if params["nb_gpus"] == 0: updater = training.StandardUpdater(train_iter, optimizer) else: if params["nb_gpus"] == 1: updater = training.StandardUpdater(train_iter, optimizer, device=id_device) else: dic_devices = {str(i): i for i in params["gpus"][1:]} dic_devices["main"] = params["gpus"][0] updater = training.ParallelUpdater(train_iter, optimizer, devices=dic_devices) trainer = training.Trainer(updater, (self.params["nb_epoch"], 'epoch'), out='/tmp/result') trainer.extend(extensions.LogReport()) trainer.extend( extensions.PrintReport( ['epoch', 'main/loss', 'main/accuracy', "elapsed_time"])) trainer.run()
def prepare_updater(self, train_it, optimizer): if config.training_params.updater_type == 'standerd': return training.StandardUpdater( \ train_it, optimizer, device=config.gpu) elif config.training_params.updater_type == 'parallel': return training.ParallelUpdater( \ train_it, optimizer, devices={'main': 1, 'second': 0})
def main(): model = VGGNet() model.train = True model = Classifier(model) model0 = copy.deepcopy(model) model1 = copy.deepcopy(model) model2 = copy.deepcopy(model) model3 = copy.deepcopy(model) model0.to_gpu(0) model1.to_gpu(1) model2.to_gpu(2) model3.to_gpu(3) ds = np.load( "/home/kaunildhruv/fbsource/fbcode/experimental/themachinist/ml/autoencoders/preprocess_ds.npz" ) print("Dataset loaded.") train, test = tuple_dataset.TupleDataset( ds["train_img"], ds["train_lable"]), tuple_dataset.TupleDataset(ds["test_img"], ds["test_lable"]) train_iter = iterators.SerialIterator(train, batch_size=bs, shuffle=True) test_iter = iterators.SerialIterator(test, batch_size=bs, shuffle=False, repeat=False) optimizer = optimizers.Adam(alpha=0.001, beta1=0.9, beta2=0.999, eps=1e-08) optimizer.setup(model) updater = training.ParallelUpdater(train_iter, optimizer, devices={ 'main': 0, 'first': 1, 'second': 2, 'third': 3 }) trainer = training.Trainer(updater, (epochs, 'epoch'), out='result') #trainer.extend(Evaluator(test_iter, model)) trainer.extend(extensions.LogReport()) interval = (5, 'epoch') iter_interval = (10000, 'iteration') trainer.extend(extensions.snapshot_object(model, 'epoch-{.updater.epoch}.model'), trigger=interval) trainer.extend(extensions.snapshot_object( model, 'iteration-{.updater.iteration}.model'), trigger=iter_interval) trainer.extend(extensions.snapshot(), trigger=interval) trainer.extend(extensions.PrintReport(['epoch', 'main/loss'])) trainer.extend(extensions.ProgressBar()) trainer.run()
def create_updater(train_iter, optimizer, devices): if HAVE_NCCL and len(devices) > 1: updater = training.updaters.MultiprocessParallelUpdater( train_iter, optimizer, devices=devices) elif len(devices) > 1: optimizer.lr /= len(devices) updater = training.ParallelUpdater(train_iter, optimizer, devices=devices) else: updater = training.StandardUpdater(train_iter, optimizer, device=devices['main']) return updater
def run_training( net, train, valid, result_dir, batchsize=64, devices=-1, training_epoch=300, initial_lr=0.05, lr_decay_rate=0.5, lr_decay_epoch=30, weight_decay=0.0005): # Iterator train_iter = iterators.MultiprocessIterator(train, batchsize) test_iter = iterators.MultiprocessIterator(valid, batchsize, False, False) # Model net = L.Classifier(net) # Optimizer optimizer = optimizers.MomentumSGD(lr=initial_lr) optimizer.setup(net) if weight_decay > 0: optimizer.add_hook(chainer.optimizer.WeightDecay(weight_decay)) # Updater if isinstance(devices, int): devices['main'] = devices updater = training.StandardUpdater( train_iter, optimizer, device=devices) elif isinstance(devices, dict): updater = training.ParallelUpdater( train_iter, optimizer, devices=devices) # 6. Trainer trainer = training.Trainer( updater, (training_epoch, 'epoch'), out=result_dir) # 7. Trainer extensions trainer.extend(extensions.LogReport()) trainer.extend(extensions.observe_lr()) trainer.extend(extensions.Evaluator( test_iter, net, device=devices['main']), name='val') trainer.extend(extensions.PrintReport( ['epoch', 'main/loss', 'main/accuracy', 'val/main/loss', 'val/main/accuracy', 'elapsed_time', 'lr'])) trainer.extend(extensions.PlotReport( ['main/loss', 'val/main/loss'], x_key='epoch', file_name='loss.png')) trainer.extend(extensions.PlotReport( ['main/accuracy', 'val/main/accuracy'], x_key='epoch', file_name='accuracy.png')) trainer.extend(extensions.ExponentialShift( 'lr', lr_decay_rate), trigger=(lr_decay_epoch, 'epoch')) trainer.extend(extensions.snapshot_object(net.predictor, 'model_{.updater.epoch}.npz'), trigger=(10, 'epoch')) trainer.run() return net
def train(self, epoch_num=40, batch_size=128, gpu=-1): train = chainer.datasets.LabeledImageDataset( "../dataset/train/info.txt", "../dataset/train") test = chainer.datasets.LabeledImageDataset( "../dataset/validation/info.txt", "../dataset/validation") model = L.Classifier( Model(out_size=25)) # loss function, default softmax_cross_entropy alpha = 1e-4 optimizer = optimizers.Adam(alpha=alpha) optimizer.setup(model) model.predictor.vgg.disable_update() # not update weight of VGG16 train = TransformDataset(train, self.transform) test = TransformDataset(test, self.transform) train_iter = chainer.iterators.SerialIterator(train, batch_size) test_iter = chainer.iterators.SerialIterator(test, batch_size, repeat=False, shuffle=False) #updater = training.StandardUpdater(train_iter, optimizer, device=gpu) updater = training.ParallelUpdater(train_iter, optimizer, devices=self.gpu_devices) trainer = training.Trainer(updater, (epoch_num, 'epoch'), out='result') trainer.extend( extensions.Evaluator(test_iter, model, device=self.gpu_devices['main'])) trainer.extend(extensions.LogReport()) trainer.extend( extensions.PrintReport([ 'epoch', 'main/loss', 'validation/main/loss', 'main/accuracy', 'validation/main/accuracy', 'elapsed_time' ])) #trainer.extend(extensions.PlotReport(['main/loss', 'validation/main/loss'], 'epoch', file_name='loss.png')) #trainer.extend(extensions.PlotReport(['main/accuracy', 'validation/main/accuracy'], 'epoch', file_name='accuracy.png')) trainer.run() model.to_cpu() serializers.save_npz("mymodel.npz", model)
def updater_creator(iterator, optimizer, devices, **kwargs): """A sample updater creator. An updater creator method should return an Updater object. Once an updter creator method is specified in the config YAML, the method will take iterator object, optimizer object, device dictionary, and "args" dictionary defined in the config YAML. You can make a custom Updater with those objects and return it. """ if HAVE_NCCL and len(devices) > 1: updater = training.updaters.MultiprocessParallelUpdater( iterator, optimizer, devices=devices) elif len(devices) > 1: optimizer.lr /= len(devices) updater = training.ParallelUpdater(iterator, optimizer, devices=devices) else: updater = training.StandardUpdater(iterator, optimizer, device=devices['main']) return updater
def main(): args = parse_args() chainer.global_config.autotune = True #chainer.set_debug(True) # Set the random seeds random.seed(args.seed) np.random.seed(args.seed) # Set up Devices devices = utils.setup_devices(args.gpus) # Load model ext = os.path.splitext(args.model_file)[1] model_path = '.'.join(os.path.split(args.model_file)).replace(ext, '') model = import_module(model_path) model = getattr(model, args.model_name)(args.output_class) #model = L.Classifier(model) model.to_gpu() # create result dir result_dir = create_result_dir(args.model_name) shutil.copy(args.model_file, os.path.join(result_dir, os.path.basename(args.model_file))) with open(os.path.join(result_dir, 'args'), 'w') as fp: fp.write(json.dumps(vars(args))) print(json.dumps(vars(args), sort_keys=True, indent=4)) # Create Dataset # Load the datasets and mean file mean = np.load(args.mean) train = ImagenetDataset(args.train_list, args.train_image) valid = ImagenetDataset(args.val_list, args.val_image) train_transform = partial(transform.food101_transform, mean=mean, random_angle=args.random_angle, expand_ratio=args.expand_ratio, crop_size=args.crop_size, train=True) valid_transform = partial(transform.food101_transform, mean=mean, train=False) train = TransformDataset(train, train_transform) valid = TransformDataset(valid, valid_transform) # Create Iterator train_iter = chainer.iterators.MultiprocessIterator(train, args.batchsize, n_processes=4) val_iter = chainer.iterators.MultiprocessIterator(valid, args.batchsize, shuffle=False, repeat=False, n_processes=4) #train_iter = chainer.iterators.SerialIterator(train, args.batchsize) #val_iter = chainer.iterators.SerialIterator(valid, args.batchsize, repeat=False, shuffle=False) # Set Optimizer optimizer = optimizers.MomentumSGD(lr=args.initial_lr, momentum=0.9) optimizer.setup(model) if args.weight_decay > 0: optimizer.add_hook(chainer.optimizer.WeightDecay(args.weight_decay)) # optimizer.use_fp32_update() # Updater updater = training.ParallelUpdater(train_iter, optimizer, devices=devices) # Trainer trainer = training.Trainer(updater, (args.training_epoch, 'epoch'), result_dir) # Trainer Extensions trainer.extend(extensions.LogReport()) trainer.extend(extensions.observe_lr()) trainer.extend(extensions.Evaluator(val_iter, model, device=devices['main']), name='val') trainer.extend(extensions.ExponentialShift('lr', args.lr_decay_rate), trigger=(args.lr_decay_epoch, 'epoch')) trainer.extend( extensions.PrintReport([ 'epoch', 'main/loss', 'main/accuracy', 'val/main/loss', 'val/main/accuracy', 'elapsed_time', 'lr' ])) if extensions.PlotReport.available(): trainer.extend( extensions.PlotReport(['main/loss', 'val/main/loss'], x_key='epoch', file_name='loss.png')) trainer.extend( extensions.PlotReport(['main/accuracy', 'val/main/accuracy'], x_key='epoch', file_name='accuracy.png')) # Print progress bar trainer.extend(extensions.ProgressBar()) # Save the model which minimizes validation loss trainer.extend(extensions.snapshot_object(model, filename='bestmodel.npz'), trigger=training.triggers.MinValueTrigger('val/main/loss')) trainer.run()
def main(): global args args = parse() args.guassian = True print('Guassian state:'+str(args.guassian)) currentDT = datetime.datetime.now() args.out_dir += str(currentDT).split('.')[0] use_gpu = args.gpus[0] >= 0 if len(args.gpus) > 1: gpus = {'main': args.gpus[1],'gpu{1}': args.gpus[0]} args.gpus = gpus #Dataset train,test = get_dataset(args) mean = np.mean([x for x, _ in train], axis=(0, 2, 3)) std = np.std([x for x, _ in train], axis=(0, 2, 3)) #Iterators train_iter = ch.iterators.MultiprocessIterator(train, args.batchsize) test_iter = ch.iterators.MultiprocessIterator(test, args.batchsize, False, False) #net net = HandDetect.HandDetect() net = WeightedLoss.WeightedLoss(net,0.01,use_gpu) #Optimizer optimizer = optimizers.MomentumSGD(lr=args.lr, momentum=args.momentum) optimizer.setup(net) if args.weight_decay > 0: optimizer.add_hook(ch.optimizer.WeightDecay(args.weight_decay)) #Updater if isinstance(args.gpus,dict): updater = training.ParallelUpdater(train_iter, optimizer, devices=args.gpus) else: updater = training.StandardUpdater(train_iter, optimizer, device=args.gpus[0]) #Trainer trainer = training.Trainer(updater, (args.training_epoch, 'epoch'), out=args.out_dir) #Training extensions trainer.extend(extensions.Evaluator(test_iter, net, device=0)) #trainer.extend(extensions.snapshot(), trigger=(20, 'epoch')) trainer.extend(extensions.LogReport()) print("The PlotReport is " + str(extensions.PlotReport.available())) if extensions.PlotReport.available(): trainer.extend( extensions.PlotReport(['main/loss', 'validation/main/loss'], 'epoch', file_name='loss.png')) trainer.extend( extensions.PlotReport( ['main/accuracy', 'validation/main/accuracy'], 'epoch', file_name='accuracy.png')) trainer.extend(extensions.PrintReport( ['epoch', 'main/loss', 'validation/main/loss', 'elapsed_time'])) #Starting training process and save model print("Start Training") trainer.run() serializers.save_npz(args.out_dir+'/hand.model',net.predictor) #generate some heatmaps to judge the result os.mkdir(args.out_dir + '/generate') for j,(hand,_) in enumerate(train): hand=cuda.to_gpu(hand.reshape((1,3,224,224)),device=0) HM = net.predictor(hand) for i in range(5): t=cuda.to_cpu(HM[0,i,...].data) cv2.imwrite(args.out_dir + '/generate/%d_%d.png' % (j,i),t * 255)
optimizer.setup(model) # Load the MNIST dataset train_iter = chainer.iterators.SerialIterator(train, args.batch_size) test_iter = chainer.iterators.SerialIterator(test, args.batch_size, repeat=False, shuffle=False) # Set up a trainer device = 0 if num_gpus > 0 else -1 # -1 indicates CPU, 0 indicates first GPU device. if num_gpus > 0: updater = training.ParallelUpdater( train_iter, optimizer, # The device of the name 'main' is used as a "master", while others are # used as slaves. Names other than 'main' are arbitrary. devices={('main' if device == 0 else str(device)): device for device in range(num_gpus)}) else: updater = training.StandardUpdater(train_iter, optimizer, device=device) # Write output files to output_data_dir. These are zipped and uploaded to S3 output path as output.tar.gz. trainer = training.Trainer(updater, (args.epochs, 'epoch'), out=args.output_dir) # Evaluate the model with the test dataset for each epoch trainer.extend(extensions.Evaluator(test_iter, model, device=device))
def main(): parser = argparse.ArgumentParser(description='Chainer YOLOv3 Train') parser.add_argument('--names') parser.add_argument('--train') parser.add_argument('--valid', default='') parser.add_argument('--detection', default='') parser.add_argument('--batchsize', '-b', type=int, default=8) parser.add_argument('--iteration', '-i', type=int, default=50200) parser.add_argument('--gpus', '-g', type=int, nargs='*', default=[]) parser.add_argument('--out', '-o', default='yolov3-result') parser.add_argument('--seed', type=int, default=0) parser.add_argument('--display_interval', type=int, default=100) parser.add_argument('--snapshot_interval', type=int, default=100) parser.add_argument('--ignore_thresh', type=float, default=0.5) parser.add_argument('--thresh', type=float, default=0.5) parser.add_argument('--darknet', default='') parser.add_argument('--darknet_class', type=int, default=-1) parser.add_argument('--steps', type=int, nargs='*', default=[-10200, -5200]) parser.add_argument('--scales', type=float, nargs='*', default=[0.1, 0.1]) args = parser.parse_args() print('GPUs: {}'.format(args.gpus)) print('# Minibatch-size: {}'.format(args.batchsize)) print('# iteration: {}'.format(args.iteration)) class_names = load_list(args.names) random.seed(args.seed) np.random.seed(args.seed) base = None if len(args.darknet) > 0: darknet_class = args.darknet_class if args.darknet_class > 0 else len(class_names) darknet53 = Darknet53(darknet_class) serializers.load_npz(args.darknet, darknet53) base = darknet53.base yolov3 = YOLOv3(len(class_names), base, ignore_thresh=args.ignore_thresh) model = YOLOv3Loss(yolov3) device = -1 if len(args.gpus) > 0: device = args.gpus[0] cuda.cupy.random.seed(args.seed) cuda.get_device_from_id(args.gpus[0]).use() if len(args.gpus) == 1: model.to_gpu() optimizer = chainer.optimizers.MomentumSGD(lr=0.001) optimizer.setup(model) optimizer.add_hook(optimizer_hooks.WeightDecay(0.0005), 'hook_decay') optimizer.add_hook(optimizer_hooks.GradientClipping(10.0), 'hook_grad_clip') train = YOLODataset(args.train, train=True, classifier=False, jitter=0.3, hue=0.1, sat=1.5, val=1.5) #train_iter = chainer.iterators.SerialIterator(train, args.batchsize) train_iter = chainer.iterators.MultiprocessIterator(train, args.batchsize, shared_mem=(448**2*3+(1+4)*100)*4) if len(args.gpus) <= 1: updater = training.StandardUpdater( train_iter, optimizer, converter=concat_yolo, device=device) else: devices = {'main': args.gpus[0]} for gpu in args.gpus[1:]: devices['gpu{}'.format(gpu)] = gpu updater = training.ParallelUpdater( train_iter, optimizer, converter=concat_yolo, devices=devices) trainer = training.Trainer( updater, (args.iteration, 'iteration'), out=args.out) display_interval = (args.display_interval, 'iteration') snapshot_interval = (args.snapshot_interval, 'iteration') print_entries = ['epoch', 'iteration', 'main/loss', 'elapsed_time'] plot_keys = ['main/loss'] snapshot_key = 'main/loss' if len(args.valid) > 0: print_entries = ['epoch', 'iteration', 'main/loss', 'validation/main/loss', 'elapsed_time'] plot_keys = ['main/loss', 'validation/main/loss'] snapshot_key = 'validation/main/loss' test = YOLODataset(args.valid, train=False, classifier=False) test_iter = chainer.iterators.SerialIterator(test, args.batchsize, repeat=False, shuffle=False) trainer.extend(extensions.Evaluator( test_iter, model, converter=concat_yolo, device=device), trigger=display_interval) trainer.extend(extensions.dump_graph('main/loss')) trainer.extend(extensions.LogReport(trigger=display_interval)) if extensions.PlotReport.available(): trainer.extend( extensions.PlotReport( plot_keys, 'iteration', display_interval, file_name='loss.png')) trainer.extend(extensions.PrintReport(print_entries), trigger=display_interval) trainer.extend(extensions.ProgressBar(update_interval=1)) trainer.extend(extensions.snapshot_object( yolov3, 'yolov3_snapshot.npz'), trigger=training.triggers.MinValueTrigger( snapshot_key, snapshot_interval)) trainer.extend(extensions.snapshot_object( yolov3, 'yolov3_backup.npz'), trigger=snapshot_interval) trainer.extend(extensions.snapshot_object( yolov3, 'yolov3_final.npz'), trigger=(args.iteration, 'iteration')) steps = args.steps for i in range(len(steps)): if steps[i] < 0: steps[i] = args.iteration + steps[i] scales = args.scales print('# steps: {}'.format(steps)) print('# scales: {}'.format(scales)) trainer.extend(DarknetShift( optimizer, 'steps', args.iteration, burn_in=1000, steps=steps, scales=scales )) trainer.extend(CropSizeUpdater(train, [(10+i)*32 for i in range(0,5)], args.iteration - 200)) if len(args.detection): detector = YOLOv3Predictor(yolov3, thresh=args.thresh) trainer.extend(YOLODetection( detector, load_list(args.detection), class_names, (416, 416),args.thresh, trigger=display_interval, device=device )) print('') print('RUN') print('') trainer.run()
def main(): # Parse the arguments. args = parse_arguments() augment = False if args.augment == 'False' else True multi_gpu = False if args.multi_gpu == 'False' else True if args.label: labels = args.label class_num = len(labels) if isinstance(labels, list) else 1 else: raise ValueError('No target label was specified.') # Dataset preparation. Postprocessing is required for the regression task. def postprocess_label(label_list): label_arr = np.asarray(label_list, dtype=np.int32) return label_arr # Apply a preprocessor to the dataset. logging.info('Preprocess train dataset and test dataset...') preprocessor = preprocess_method_dict[args.method]() parser = CSVFileParserForPair(preprocessor, postprocess_label=postprocess_label, labels=labels, smiles_cols=['smiles_1', 'smiles_2']) train = parser.parse(args.train_datafile)['dataset'] valid = parser.parse(args.valid_datafile)['dataset'] if augment: logging.info('Utilizing data augmentation in train set') train = augment_dataset(train) num_train = train.get_datasets()[0].shape[0] num_valid = valid.get_datasets()[0].shape[0] logging.info('Train/test split: {}/{}'.format(num_train, num_valid)) if len(args.net_hidden_dims): net_hidden_dims = tuple([int(net_hidden_dim) for net_hidden_dim in args.net_hidden_dims.split(',')]) else: net_hidden_dims = () fp_attention = True if args.fp_attention else False update_attention = True if args.update_attention else False weight_tying = False if args.weight_tying == 'False' else True attention_tying = False if args.attention_tying == 'False' else True fp_batch_normalization = True if args.fp_bn == 'True' else False layer_aggregator = None if args.layer_aggregator == '' else args.layer_aggregator context = False if args.context == 'False' else True output_activation = functions.relu if args.output_activation == 'relu' else None predictor = set_up_predictor(method=args.method, fp_hidden_dim=args.fp_hidden_dim, fp_out_dim=args.fp_out_dim, conv_layers=args.conv_layers, concat_hidden=args.concat_hidden, layer_aggregator=layer_aggregator, fp_dropout_rate=args.fp_dropout_rate, fp_batch_normalization=fp_batch_normalization, net_hidden_dims=net_hidden_dims, class_num=class_num, sim_method=args.sim_method, fp_attention=fp_attention, weight_typing=weight_tying, attention_tying=attention_tying, update_attention=update_attention, context=context, context_layers=args.context_layers, context_dropout=args.context_dropout, message_function=args.message_function, readout_function=args.readout_function, num_timesteps=args.num_timesteps, num_output_hidden_layers=args.num_output_hidden_layers, output_hidden_dim=args.output_hidden_dim, output_activation=output_activation, symmetric=args.symmetric ) train_iter = SerialIterator(train, args.batchsize) test_iter = SerialIterator(valid, args.batchsize, repeat=False, shuffle=False) metrics_fun = {'accuracy': F.binary_accuracy} classifier = Classifier(predictor, lossfun=F.sigmoid_cross_entropy, metrics_fun=metrics_fun, device=args.gpu) # Set up the optimizer. optimizer = optimizers.Adam(alpha=args.learning_rate, weight_decay_rate=args.weight_decay_rate) # optimizer = optimizers.Adam() # optimizer = optimizers.SGD(lr=args.learning_rate) optimizer.setup(classifier) # add regularization if args.max_norm > 0: optimizer.add_hook(chainer.optimizer.GradientClipping(threshold=args.max_norm)) if args.l2_rate > 0: optimizer.add_hook(chainer.optimizer.WeightDecay(rate=args.l2_rate)) if args.l1_rate > 0: optimizer.add_hook(chainer.optimizer.Lasso(rate=args.l1_rate)) # Set up the updater. if multi_gpu: logging.info('Using multiple GPUs') updater = training.ParallelUpdater(train_iter, optimizer, devices={'main': 0, 'second': 1}, converter=concat_mols) else: logging.info('Using single GPU') updater = training.StandardUpdater(train_iter, optimizer, device=args.gpu, converter=concat_mols) # Set up the trainer. logging.info('Training...') # add stop_trigger parameter early_stop = triggers.EarlyStoppingTrigger(monitor='validation/main/loss', patients=30, max_trigger=(500, 'epoch')) out = 'output' + '/' + args.out trainer = training.Trainer(updater, stop_trigger=early_stop, out=out) # trainer = training.Trainer(updater, (args.epoch, 'epoch'), out=args.out) trainer.extend(E.Evaluator(test_iter, classifier, device=args.gpu, converter=concat_mols)) train_eval_iter = SerialIterator(train, args.batchsize, repeat=False, shuffle=False) trainer.extend(AccuracyEvaluator( train_eval_iter, classifier, eval_func=predictor, device=args.gpu, converter=concat_mols, name='train_acc', pos_labels=1, ignore_labels=-1, raise_value_error=False)) # extension name='validation' is already used by `Evaluator`, # instead extension name `val` is used. trainer.extend(AccuracyEvaluator( test_iter, classifier, eval_func=predictor, device=args.gpu, converter=concat_mols, name='val_acc', pos_labels=1, ignore_labels=-1)) trainer.extend(ROCAUCEvaluator( train_eval_iter, classifier, eval_func=predictor, device=args.gpu, converter=concat_mols, name='train_roc', pos_labels=1, ignore_labels=-1, raise_value_error=False)) # extension name='validation' is already used by `Evaluator`, # instead extension name `val` is used. trainer.extend(ROCAUCEvaluator( test_iter, classifier, eval_func=predictor, device=args.gpu, converter=concat_mols, name='val_roc', pos_labels=1, ignore_labels=-1)) trainer.extend(PRCAUCEvaluator( train_eval_iter, classifier, eval_func=predictor, device=args.gpu, converter=concat_mols, name='train_prc', pos_labels=1, ignore_labels=-1, raise_value_error=False)) # extension name='validation' is already used by `Evaluator`, # instead extension name `val` is used. trainer.extend(PRCAUCEvaluator( test_iter, classifier, eval_func=predictor, device=args.gpu, converter=concat_mols, name='val_prc', pos_labels=1, ignore_labels=-1)) trainer.extend(F1Evaluator( train_eval_iter, classifier, eval_func=predictor, device=args.gpu, converter=concat_mols, name='train_f', pos_labels=1, ignore_labels=-1, raise_value_error=False)) # extension name='validation' is already used by `Evaluator`, # instead extension name `val` is used. trainer.extend(F1Evaluator( test_iter, classifier, eval_func=predictor, device=args.gpu, converter=concat_mols, name='val_f', pos_labels=1, ignore_labels=-1)) # apply shift strategy to learning rate every 10 epochs # trainer.extend(E.ExponentialShift('alpha', args.exp_shift_rate), trigger=(10, 'epoch')) if args.exp_shift_strategy == 1: trainer.extend(E.ExponentialShift('alpha', args.exp_shift_rate), trigger=triggers.ManualScheduleTrigger([10, 20, 30, 40, 50, 60], 'epoch')) elif args.exp_shift_strategy == 2: trainer.extend(E.ExponentialShift('alpha', args.exp_shift_rate), trigger=triggers.ManualScheduleTrigger([5, 10, 15, 20, 25, 30], 'epoch')) elif args.exp_shift_strategy == 3: trainer.extend(E.ExponentialShift('alpha', args.exp_shift_rate), trigger=triggers.ManualScheduleTrigger([5, 10, 15, 20, 25, 30, 40, 50, 60, 70], 'epoch')) else: raise ValueError('No such strategy to adapt learning rate') # # observation of learning rate trainer.extend(E.observe_lr(), trigger=(1, 'iteration')) entries = [ 'epoch', 'main/loss', 'train_acc/main/accuracy', 'train_roc/main/roc_auc', 'train_prc/main/prc_auc', # 'train_p/main/precision', 'train_r/main/recall', 'train_f/main/f1', 'validation/main/loss', 'val_acc/main/accuracy', 'val_roc/main/roc_auc', 'val_prc/main/prc_auc', # 'val_p/main/precision', 'val_r/main/recall', 'val_f/main/f1', 'lr', 'elapsed_time'] trainer.extend(E.PrintReport(entries=entries)) # change from 10 to 2 on Mar. 1 2019 trainer.extend(E.snapshot(), trigger=(2, 'epoch')) trainer.extend(E.LogReport()) trainer.extend(E.ProgressBar()) trainer.extend(E.PlotReport(['main/loss', 'validation/main/loss'], 'epoch', file_name='loss.png')) trainer.extend(E.PlotReport(['train_acc/main/accuracy', 'val_acc/main/accuracy'], 'epoch', file_name='accuracy.png')) if args.resume: resume_path = os.path.join(out, args.resume) logging.info('Resume training according to snapshot in {}'.format(resume_path)) chainer.serializers.load_npz(resume_path, trainer) trainer.run() # Save the regressor's parameters. model_path = os.path.join(out, args.model_filename) logging.info('Saving the trained models to {}...'.format(model_path)) classifier.save_pickle(model_path, protocol=args.protocol)
optimizer.setup(model) train_iter.reset() test_iter.reset() if mlp_config['context'] == 'gpu': updater = training.StandardUpdater(train_iter, optimizer, device=0) elif mlp_config['context'] == 'multi-gpu': assert mlp_config['gpus'] > 1 device_dict = {'main': 0} for i in range(mlp_config['gpus'] - 1): device_dict["gpu_{}".format(i + 1)] = i + 1 updater = training.ParallelUpdater(train_iter, optimizer, devices=device_dict) else: updater = training.StandardUpdater( train_iter, optimizer) trainer = training.Trainer(updater, (mlp_config['epochs'], 'epoch'), out='result') if mlp_config['context'] == 'gpu': trainer.extend( extensions.Evaluator(test_iter, model, device=0)) elif mlp_config['context'] == 'multi-gpu': trainer.extend( extensions.Evaluator(test_iter, model, device=0)) else:
def main(): parser = argparse.ArgumentParser(description='Chainer YOLOv3 VOC Train') parser.add_argument('--batchsize', '-b', type=int, default=8) parser.add_argument('--iteration', '-i', type=int, default=50200) parser.add_argument('--gpus', '-g', type=int, nargs='*', default=[]) parser.add_argument('--out', '-o', default='yolov3-voc-result') parser.add_argument('--seed', type=int, default=0) parser.add_argument('--display_interval', type=int, default=100) parser.add_argument('--snapshot_interval', type=int, default=100) parser.add_argument('--ignore_thresh', type=float, default=0.5) parser.add_argument('--thresh', type=float, default=0.4) parser.add_argument('--darknet', default='') parser.add_argument('--validation_size', type=int, default=32) args = parser.parse_args() print('GPUs: {}'.format(args.gpus)) print('# Minibatch-size: {}'.format(args.batchsize)) print('# iteration: {}'.format(args.iteration)) print('') random.seed(args.seed) np.random.seed(args.seed) base = None if len(args.darknet) > 0: darknet53 = Darknet53(20) serializers.load_npz(args.darknet, darknet53) base = darknet53.base yolov3 = YOLOv3(20, base, ignore_thresh=args.ignore_thresh) model = YOLOv3Loss(yolov3) device = -1 if len(args.gpus) > 0: device = args.gpus[0] cuda.cupy.random.seed(args.seed) cuda.get_device_from_id(args.gpus[0]).use() if len(args.gpus) == 1: model.to_gpu() optimizer = chainer.optimizers.MomentumSGD(lr=0.001) optimizer.setup(model) optimizer.add_hook(optimizer_hooks.WeightDecay(0.0005), 'hook_decay') optimizer.add_hook(optimizer_hooks.GradientClipping(10.0), 'hook_grad_clip') train = VOCBboxDataset(split='train') test = VOCBboxDataset(split='val') train = YOLOVOCDataset(train, classifier=False, jitter=0.3, hue=0.1, sat=1.5, val=1.5) #train = train[np.arange(args.batchsize)] test = YOLOVOCDataset(test, classifier=False) test = test[np.random.permutation(np.arange(len(test)))[:min(args.validation_size, len(test))]] train_iter = chainer.iterators.SerialIterator(train, args.batchsize) test_iter = chainer.iterators.SerialIterator(test, args.batchsize, repeat=False, shuffle=False) if len(args.gpus) <= 1: updater = training.StandardUpdater( train_iter, optimizer, converter=concat_yolo, device=device) else: devices = {'main': args.gpus[0]} for gpu in args.gpus[1:]: devices['gpu{}'.format(gpu)] = gpu updater = training.ParallelUpdater( train_iter, optimizer, converter=concat_yolo, devices=devices) trainer = training.Trainer( updater, (args.iteration, 'iteration'), out=args.out) display_interval = (args.display_interval, 'iteration') snapshot_interval = (args.snapshot_interval, 'iteration') trainer.extend(extensions.Evaluator( test_iter, model, converter=concat_yolo, device=device), trigger=display_interval) trainer.extend(extensions.dump_graph('main/loss')) trainer.extend(extensions.LogReport(trigger=display_interval)) if extensions.PlotReport.available(): trainer.extend( extensions.PlotReport( ['main/loss', 'validation/main/loss'], 'iteration', display_interval, file_name='loss.png')) trainer.extend(extensions.PrintReport( ['epoch', 'iteration', 'main/loss', 'validation/main/loss', 'elapsed_time']), trigger=display_interval) trainer.extend(extensions.ProgressBar(update_interval=1)) trainer.extend(extensions.snapshot_object( yolov3, 'yolov3_snapshot.npz'), trigger=training.triggers.MinValueTrigger( 'validation/main/loss', snapshot_interval)) trainer.extend(extensions.snapshot_object( yolov3, 'yolov3_final.npz'), trigger=snapshot_interval) trainer.extend(DarknetShift( optimizer, 'steps', args.iteration, burn_in=1000, steps=[args.iteration-10200,args.iteration-5200], scales=[0.1,0.1] )) trainer.extend(CropSizeUpdater(train, [(10+i)*32 for i in range(0,5)], args.iteration - 200)) detector = YOLOv3Predictor(yolov3, thresh=args.thresh) class_names = load_list('./data/voc.names') trainer.extend(YOLODetection( detector, ['./data/image/dog.jpg'], class_names, size=(416, 416) ,thresh=args.thresh, trigger=display_interval, device=device )) trainer.run()
def main(): parser = argparse.ArgumentParser(description='Chainer Darknet53 Train') parser.add_argument('--batchsize', '-b', type=int, default=8) parser.add_argument('--iteration', '-i', type=int, default=100000) parser.add_argument('--gpus', '-g', type=int, nargs='*', default=[]) parser.add_argument('--out', '-o', default='darknet53-voc-result') parser.add_argument('--seed', default=0) parser.add_argument('--display_interval', type=int, default=100) parser.add_argument('--snapshot_interval', type=int, default=100) parser.add_argument('--validation_size', type=int, default=2048) args = parser.parse_args() print('GPUs: {}'.format(args.gpus)) print('# Minibatch-size: {}'.format(args.batchsize)) print('# iteration: {}'.format(args.iteration)) print('') random.seed(args.seed) np.random.seed(args.seed) darknet53 = Darknet53(20) model = L.Classifier(darknet53) device = -1 if len(args.gpus) > 0: device = args.gpus[0] cuda.cupy.random.seed(args.seed) cuda.get_device_from_id(args.gpus[0]).use() if len(args.gpus) == 1: model.to_gpu() optimizer = chainer.optimizers.MomentumSGD(lr=0.001) optimizer.setup(model) optimizer.add_hook(chainer.optimizer_hooks.WeightDecay(0.0005), 'hook_decay') train = VOCBboxDataset(split='train') test = VOCBboxDataset(split='val') train = YOLOVOCDataset(train, classifier=True, jitter=0.2, hue=0.1, sat=.75, val=.75) test = YOLOVOCDataset(test, classifier=True, crop_size=(256, 256)) test = test[np.random.permutation(np.arange( len(test)))[:min(args.validation_size, len(test))]] train_iter = chainer.iterators.SerialIterator(train, args.batchsize) test_iter = chainer.iterators.SerialIterator(test, args.batchsize, repeat=False, shuffle=False) if len(args.gpus) <= 1: updater = training.StandardUpdater(train_iter, optimizer, device=device) else: devices = {'main': args.gpus[0]} for gpu in args.gpus[1:]: devices['gpu{}'.format(gpu)] = gpu updater = training.ParallelUpdater(train_iter, optimizer, devices=devices) trainer = training.Trainer(updater, (args.iteration, 'iteration'), out=args.out) display_interval = (args.display_interval, 'iteration') snapshot_interval = (args.snapshot_interval, 'iteration') trainer.extend(extensions.Evaluator(test_iter, model, device=device), trigger=display_interval) trainer.extend(extensions.dump_graph('main/loss')) trainer.extend(extensions.LogReport(trigger=display_interval)) if extensions.PlotReport.available(): trainer.extend( extensions.PlotReport(['main/loss', 'validation/main/loss'], 'iteration', display_interval, file_name='loss.png')) trainer.extend( extensions.PlotReport( ['main/accuracy', 'validation/main/accuracy'], 'iteration', display_interval, file_name='accuracy.png')) trainer.extend(extensions.PrintReport([ 'epoch', 'iteration', 'main/loss', 'validation/main/loss', 'main/accuracy', 'validation/main/accuracy', 'elapsed_time' ]), trigger=display_interval) trainer.extend(extensions.ProgressBar(update_interval=5)) trainer.extend(extensions.snapshot_object(darknet53, 'darknet53_snapshot.npz'), trigger=training.triggers.MinValueTrigger( 'validation/main/loss', snapshot_interval)) trainer.extend(extensions.snapshot_object(darknet53, 'darknet53_final.npz'), trigger=snapshot_interval) trainer.extend(DarknetShift(optimizer, 'poly', args.iteration)) trainer.extend(CropSizeUpdater(train, [(4 + i) * 32 for i in range(0, 11)])) trainer.run()
def main(): parser = argparse.ArgumentParser(description='Chainer CIFAR example:') parser.add_argument('--dataset', '-d', default='cifar10', help='The dataset to use: cifar10 or cifar100') parser.add_argument('--batchsize', '-b', type=int, default=100, help='Number of images in each mini-batch') parser.add_argument('--same_batch', '-s', type=bool, default=False, help='if True and use multi gpu, batchsize*gpu_num') parser.add_argument('--epoch', '-e', type=int, default=20, help='Number of sweeps over the dataset to train') parser.add_argument('--gpu_num', '-gn', type=int, default=1, help='a number of GPU(negative value indicates CPU)') parser.add_argument('--gpu', '-g', type=int, default=0, help='main GPU ID (negative value indicates CPU)') parser.add_argument('--model', '-m', default='allconvnet', help='choose training model') parser.add_argument('--out', '-o', default='result', help='Directory to output the result') parser.add_argument('--resume', '-r', default='', help='Resume the training from snapshot') args = parser.parse_args() print('# a number of using GPU: {}'.format(args.gpu_num)) print('# Minibatch-size: {}'.format(args.batchsize)) print('# epoch: {}'.format(args.epoch)) # make dump name with this experiment dump_dir = './result/train_log' + '_gpu_num-' + str( args.gpu_num) + "_model-" + str(args.model) + '_epoch-' + str( args.epoch) + '_batchsize-' + str( args.batchsize) + '_datset-' + str(args.dataset) # Set up a neural network to train. # Classifier reports softmax cross entropy loss and accuracy at every # iteration, which will be used by the PrintReport extension below. if args.dataset == 'cifar10': print('# Using CIFAR10 dataset.') class_labels = 10 train, test = get_cifar10() elif args.dataset == 'cifar100': print('# Using CIFAR100 dataset.') class_labels = 100 train, test = get_cifar100() else: raise RuntimeError('Invalid dataset choice.') if args.model == 'resnet': print('# cnn_model: resnet') model = L.Classifier(ResNet(class_labels=class_labels)) elif args.model == 'allconvnet': print('# cnn_model: AllConvNetBN') model = L.Classifier(AllConvNetBN(class_labels)) else: raise RuntimeError('Invalid dataset choice.') if args.gpu >= 0 and args.gpu_num >= 1: chainer.cuda.get_device(args.gpu).use() # Make a specified GPU current #optimizer = chainer.optimizers.MomentumSGD(0.01) optimizer = chainer.optimizers.Adam() optimizer.setup(model) #optimizer.add_hook(chainer.optimizer.WeightDecay(5e-4)) #multi gpu環境、つまりParallelUpdaterを使った並列GPU処理だとbatchsize = batchsize/gpu_num batchsize = args.batchsize * args.gpu_num if args.same_batch else args.batchsize train_iter = chainer.iterators.SerialIterator(train, batchsize) test_iter = chainer.iterators.SerialIterator(test, batchsize, repeat=False, shuffle=False) # Set up a trainer if args.gpu_num <= 1: print("# main gpu: ", args.gpu) model.to_gpu() # Copy the model to the GPU updater = training.StandardUpdater(train_iter, optimizer, device=args.gpu) elif args.gpu_num >= 2: _devices = {'main': args.gpu} print("# main gpu: ", args.gpu) for g_idx in range(1, args.gpu_num): _devices[str(g_idx)] = g_idx print("# using gpus: ", _devices) updater = training.ParallelUpdater( train_iter, optimizer, devices=_devices, ) trainer = training.Trainer(updater, (args.epoch, 'epoch'), out=dump_dir) # Evaluate the model with the test dataset for each epoch trainer.extend(TestModeEvaluator(test_iter, model, device=args.gpu)) # Reduce the learning rate by half every 25 epochs. trainer.extend(extensions.ExponentialShift('alpha', 0.5), trigger=(20, 'epoch')) # Dump a computational graph from 'loss' variable at the first iteration # The "main" refers to the target link of the "main" optimizer. trainer.extend(extensions.dump_graph('main/loss')) # Take a snapshot at each epoch trainer.extend(extensions.snapshot(), trigger=(args.epoch, 'epoch')) # Write a log of evaluation statistics for each epoch trainer.extend(extensions.LogReport()) # Print selected entries of the log to stdout # Here "main" refers to the target link of the "main" optimizer again, and # "validation" refers to the default name of the Evaluator extension. # Entries other than 'epoch' are reported by the Classifier link, called by # either the updater or the evaluator. trainer.extend( extensions.PrintReport([ 'epoch', 'main/loss', 'validation/main/loss', 'main/accuracy', 'validation/main/accuracy', 'elapsed_time' ])) # Print a progress bar to stdout trainer.extend(extensions.ProgressBar()) if args.resume: print('Resume from a snapshot') chainer.serializers.load_npz(args.resume, trainer) # Run the training trainer.run()
def main(): parser = argparse.ArgumentParser() pa = parser.add_argument pa('--gpu', type=str, default='0') pa('--epoch', type=int, default=20) pa('--debug_mode', action='store_true') pa('--resume', type=str, default='') pa('--data_dir', type=str, default='../bi-att-flow/data/squad_nonsplit') pa('--ckpt_path', type=str, default='logs') pa('--log_path', type=str, default='logs') pa('--batch_size', type=int, default=60) pa('--display_step', type=int, default=50) pa('--eval_step', type=int, default=500) pa('--init_lr', type=float, default=0.5) pa('--optimizer', type=str, default='adadelta') pa('--decay_rate', type=float, default=0.999) pa('--dropout_rate', type=float, default=0.2) pa('--no_ema', action='store_true') pa('--hidden_size', type=int, default=100) pa('--word_emb_dim', type=int, default=100) pa('--char_emb_dim', type=int, default=8) pa('--char_conv_n_kernel', type=int, default=100) pa('--char_conv_height', type=int, default=5) pa('--char_out_dim', type=int, default=100) pa('--highway_n_layer', type=int, default=2) pa('--word_count_th', type=int, default=10) pa('--char_count_th', type=int, default=50) pa('--sent_size_th', type=int, default=195) # 400 pa('--para_size_th', type=int, default=256) pa('--num_sents_th', type=int, default=8) pa('--ques_size_th', type=int, default=30) pa('--word_size_th', type=int, default=16) config = parser.parse_args() print(json.dumps(config.__dict__, indent=4)) train_data, test_data, vocab = load_dataset(config) config = update_config(config, [train_data, test_data], vocab) config.gpu = [int(g) for g in config.gpu.split(',')] config.enc_dim = config.word_emb_dim + config.char_out_dim model = BiDAF(config) if config.resume: serializers.load_npz(config.resume, model) # optimizer if config.optimizer == 'adam': optimizer = chainer.optimizers.Adam(0.001) else: optimizer = AdaDeltaWithLearningRate(lr=config.init_lr, eps=1e-08) optimizer.setup(model) model.word_emb.W.update_rule.enabled = False # iterator train_iter = MultiprocessIterator(train_data, config.batch_size, repeat=True, shuffle=True) test_iter = MultiprocessIterator(test_data, config.batch_size, repeat=False, shuffle=False) # updater, trainer if len(config.gpu) == 2: multi_devices = { 'main': int(config.gpu[0]), 'second': int(config.gpu[1]) } updater = training.ParallelUpdater(train_iter, optimizer, converter=squad_converter, devices=multi_devices) elif len(config.gpu) == 1: if config.gpu[0] >= 0: model.to_gpu(config.gpu[0]) updater = training.StandardUpdater(train_iter, optimizer, converter=squad_converter, device=config.gpu[0]) trainer = training.Trainer(updater, (config.epoch, 'epoch'), out=config.log_path) evaluator = BiDAFEvaluator(test_iter, model, config, converter=squad_converter, device=config.gpu[0]) evaluator.name = 'val' iter_per_epoch = len(train_data) // config.batch_size print('Iter/epoch =', iter_per_epoch) log_trigger = (min(config.display_step, iter_per_epoch // 2), 'iteration') eval_trigger = (config.eval_step, 'iteration') if iter_per_epoch > config.eval_step else ( 1, 'epoch') record_trigger = training.triggers.MaxValueTrigger('val/main/f1', eval_trigger) trainer.extend(extensions.snapshot_object( model, 'model_epoch_{.updater.epoch}.npz'), trigger=record_trigger) trainer.extend(evaluator, trigger=eval_trigger) trainer.extend( extensions.LogReport(trigger=log_trigger, log_name='iteration.log')) trainer.extend( extensions.LogReport(trigger=eval_trigger, log_name='epoch.log')) trainer.extend(extensions.LogReport()) trainer.extend( extensions.PrintReport([ 'epoch', 'iteration', 'main/loss', 'main/match', 'main/f1', 'val/main/loss', 'val/main/match', 'val/main/f1', 'elapsed_time' ])) trainer.run()
def main(): # This script is almost identical to train_mnist.py. The only difference is # that this script uses data-parallel computation on two GPUs. # See train_mnist.py for more details. parser = argparse.ArgumentParser(description='Chainer example: MNIST') parser.add_argument('--batchsize', '-b', type=int, default=400, help='Number of images in each mini-batch') parser.add_argument('--epoch', '-e', type=int, default=20, help='Number of sweeps over the dataset to train') parser.add_argument('--gpu0', '-g', type=int, default=0, help='First GPU ID') parser.add_argument('--gpu1', '-G', type=int, default=1, help='Second GPU ID') parser.add_argument('--out', '-o', default='result_parallel', help='Directory to output the result') parser.add_argument('--resume', '-r', default='', help='Resume the training from snapshot') parser.add_argument('--unit', '-u', type=int, default=1000, help='Number of units') args = parser.parse_args() print('GPU: {}, {}'.format(args.gpu0, args.gpu1)) print('# unit: {}'.format(args.unit)) print('# Minibatch-size: {}'.format(args.batchsize)) print('# epoch: {}'.format(args.epoch)) print('') chainer.cuda.get_device(args.gpu0).use() model = L.Classifier(train_mnist.MLP(args.unit, 10)) optimizer = chainer.optimizers.Adam() optimizer.setup(model) train, test = chainer.datasets.get_mnist() train_iter = chainer.iterators.SerialIterator(train, args.batchsize) test_iter = chainer.iterators.SerialIterator(test, args.batchsize, repeat=False, shuffle=False) # ParallelUpdater implements the data-parallel gradient computation on # multiple GPUs. It accepts "devices" argument that specifies which GPU to # use. updater = training.ParallelUpdater( train_iter, optimizer, # The device of the name 'main' is used as a "master", while others are # used as slaves. Names other than 'main' are arbitrary. devices={ 'main': args.gpu0, 'second': args.gpu1 }, ) trainer = training.Trainer(updater, (args.epoch, 'epoch'), out=args.out) trainer.extend(extensions.Evaluator(test_iter, model, device=args.gpu0)) trainer.extend(extensions.dump_graph('main/loss')) trainer.extend(extensions.snapshot(), trigger=(args.epoch, 'epoch')) trainer.extend(extensions.LogReport()) trainer.extend( extensions.PrintReport([ 'epoch', 'main/loss', 'validation/main/loss', 'main/accuracy', 'validation/main/accuracy', 'elapsed_time' ])) trainer.extend(extensions.ProgressBar()) if args.resume: chainer.serializers.load_npz(args.resume, trainer) trainer.run()
def main(): parser = argparse.ArgumentParser() parser.add_argument("--config_path", type=str, default="config.ini") parser.add_argument("--resume") args = parser.parse_args() config = configparser.ConfigParser() config.read(args.config_path, "UTF-8") train_param = utils.get_config(config) chainer.global_config.autotune = True chainer.cuda.set_max_workspace_size(11388608) chainer.config.cudnn_fast_batch_normalization = True logger.info("> set up devices") if chainer.backends.cuda.available: devices = utils.setup_devices(train_param["gpus"]) else: # cpu run devices = {"main": -1} logger.info("> set devices {}".format(devices)) utils.set_random_seed(devices, train_param["seed"]) # get dataset logger.info("> get dataset") train, test = select_dataset(config, return_data=["train_set", "val_set"]) logger.info("> size of train {}".format(len(train))) logger.info("> size of test {}".format(len(test))) # create result dir and copy file result = config["output_path"]["result_dir"] logger.info("> store file to result dir {}".format(result)) utils.create_result_dir(result) destination = os.path.join(result, "detector") logger.info("> store config.ini to {}".format( os.path.join(destination, "config.ini"))) if not os.path.exists(destination): os.makedirs(destination) shutil.copy(args.config_path, os.path.join(destination, "config.ini")) # load model logger.info("> load model") model = utils.create_ssd_model(train_param) model.use_preset("evaluate") train_chain = MultiboxTrainChain(model, beta=4) logger.info("> transform dataset") train = TransformDataset( train, Transform(model.coder, model.insize, model.mean, train=True)) train_iter = chainer.iterators.MultiprocessIterator( train, train_param["batchsize"], n_processes=train_param["num_process"]) test = TransformDataset( test, Transform(model.coder, model.insize, model.mean, train=False)) test_iter = chainer.iterators.MultiprocessIterator( test, train_param["batchsize"], repeat=False, shuffle=False, n_processes=4) # initial lr is set to 1e-3 by ExponentialShift logger.info("> set up optimizer") optimizer = chainer.optimizers.MomentumSGD(lr=train_param["learning_rate"]) # optimizer = chainer.optimizers.RMSprop(lr=train_param["learning_rate"]) optimizer.setup(train_chain) for param in train_chain.params(): if param.name == "b": param.update_rule.add_hook(GradientScaling(2)) else: param.update_rule.add_hook(WeightDecay(0.0005)) updater = training.ParallelUpdater(train_iter, optimizer, devices=devices) trainer = training.Trainer( updater, (train_param["train_iter"], "iteration"), destination, ) trainer.extend( extensions.ExponentialShift("lr", 0.1, init=train_param["learning_rate"]), trigger=triggers.ManualScheduleTrigger(train_param["schedule"], "iteration")) # set current device to devices["main"] # with chainer.cuda.Device(devices["main"]): eval_interval = 500, "iteration" logger.info("setup evaluator {}".format(train_param["hand_class"])) trainer.extend( DetectionCOCOEvaluator( test_iter, model, device=devices["main"], label_names=train_param["hand_class"], ), trigger=eval_interval, ) log_interval = 100, "iteration" trainer.extend(extensions.LogReport(trigger=log_interval)) trainer.extend(extensions.observe_lr(), trigger=log_interval) trainer.extend(extensions.PrintReport([ "epoch", "iteration", "lr", "main/loss", "main/loss/loc", "main/loss/conf", "validation/main/map", ]), trigger=log_interval) trainer.extend(extensions.ProgressBar(update_interval=100)) trainer.extend(extensions.snapshot(filename="best_snapshot"), trigger=MaxValueTrigger("validation/main/map", trigger=eval_interval)) trainer.extend(extensions.snapshot_object(model, filename="bestmodel.npz"), trigger=MaxValueTrigger("validation/main/map", trigger=eval_interval)) if extensions.PlotReport.available(): trainer.extend( extensions.PlotReport(["main/loss", "validation/main/loss"], x_key="iteration", file_name="loss.png")) trainer.extend( extensions.PlotReport(["main/accuracy/map", "validation/main/map"], x_key="iteration", file_name="accuracy.png")) if args.resume: serializers.load_npz(args.resume, trainer) logger.info("> run trainer") trainer.run()
def main(): ''' main function, start point ''' # 引数関連 parser = argparse.ArgumentParser() parser.add_argument('--batchsize', '-b', type=int, default=128, help='Number of images in each mini-batch') parser.add_argument('--learnrate', '-l', type=float, default=0.001, help='Learning rate for SGD') parser.add_argument('--epoch', '-e', type=int, default=100, help='Number of sweeps over the dataset to train') parser.add_argument('--gpu0', '-g', type=int, default=0, help='GPU1 ID (negative value indicates CPU)') parser.add_argument('--gpu1', '-G', type=int, default=2, help='GPU2 ID (negative value indicates CPU)') parser.add_argument('--resume', '-r', default='', help='Resume the training from snapshot') parser.add_argument('--iter_parallel', '-p', action='store_true', default=False, help='loading dataset from disk') parser.add_argument('--opt', '-o', type=str, choices=('adam', 'sgd'), default='adam') parser.add_argument('--fsize', '-f', type=int, default=5) parser.add_argument('--ch', '-c', type=int, default=4) args = parser.parse_args() # parameter出力 print("-=Learning Parameter=-") print("# Max Epochs: {}".format(args.epoch)) print("# Batch Size: {}".format(args.batchsize)) print("# Learning Rate: {}".format(args.learnrate)) print("# Optimizer Method: {}".format(args.opt)) print("# Filter Size: {}".format(args.fsize)) print("# Channel Scale: {}".format(args.ch)) print('# Train Dataet: General 100') if args.iter_parallel: print("# Data Iters that loads in Parallel") print("\n") # 保存ディレクトリ # save didrectory model_dir_name = 'AEFINet_concat_parallel_opt_{}_ch_{}_fsize_{}'.format( args.opt, args.ch, args.fsize) outdir = path.join(ROOT_PATH, 'results', 'FI', 'AEFINet', model_dir_name) if not path.exists(outdir): os.makedirs(outdir) with open(path.join(outdir, 'arg_param.txt'), 'w') as f: for k, v in args.__dict__.items(): f.write('{}:{}\n'.format(k, v)) #loading dataset print('# loading dataet(General100_train, General100_test) ...') if args.iter_parallel: train = ds.SequenceDataset(dataset='train') test = ds.SequenceDataset(dataset='test') else: train = ds.SequenceDatasetOnMem(dataset='train') test = ds.SequenceDatasetOnMem(dataset='test') chainer.cuda.get_device_from_id(args.gpu0).use() # prepare model model = N.AEFINetConcat(f_size=args.fsize, ch=args.ch) # model.to_gpu() # setup optimizer if args.opt == 'adam': optimizer = chainer.optimizers.Adam(alpha=args.learnrate) elif args.opt == 'sgd': optimizer = chainer.optimizers.MomentumSGD(lr=args.learnrate, momentum=0.9) optimizer.setup(model) optimizer.add_hook(chainer.optimizer.WeightDecay(0.0001)) # setup iter if args.iter_parallel: train_iter = chainer.iterators.MultiprocessIterator(train, args.batchsize, n_processes=8) test_iter = chainer.iterators.MultiprocessIterator(test, args.batchsize, repeat=False, shuffle=False, n_processes=8) else: train_iter = chainer.iterators.SerialIterator(train, args.batchsize) test_iter = chainer.iterators.SerialIterator(test, args.batchsize, repeat=False, shuffle=False) # setup trainer updater = training.ParallelUpdater( train_iter, optimizer, devices={ 'main': args.gpu0, 'second': args.gpu1 }, ) trainer = training.Trainer(updater, (args.epoch, 'epoch'), out=outdir) # # eval test data trainer.extend(extensions.Evaluator(test_iter, model, device=args.gpu0)) # dump loss graph trainer.extend(extensions.dump_graph('main/loss')) # lr shift if args.opt == 'sgd': trainer.extend(extensions.ExponentialShift("lr", 0.1), trigger=(100, 'epoch')) elif args.opt == 'adam': trainer.extend(extensions.ExponentialShift("alpha", 0.1), trigger=(100, 'epoch')) # save snapshot trainer.extend(extensions.snapshot(), trigger=(10, 'epoch')) trainer.extend(extensions.snapshot_object( model, 'model_snapshot_{.updater.epoch}'), trigger=(10, 'epoch')) # log report trainer.extend(extensions.LogReport()) trainer.extend(extensions.observe_lr(), trigger=(1, 'epoch')) # plot loss graph trainer.extend( extensions.PlotReport(['main/loss', 'validation/main/loss'], 'epoch', file_name='loss.png')) # plot acc graph trainer.extend( extensions.PlotReport(['main/PSNR', 'validation/main/PSNR'], 'epoch', file_name='PSNR.png')) # print info trainer.extend( extensions.PrintReport([ 'epoch', 'main/loss', 'validation/main/loss', 'main/PSNR', 'validation/main/PSNR', 'lr', 'elapsed_time' ])) # print progbar trainer.extend(extensions.ProgressBar()) # [ChainerUI] enable to send commands from ChainerUI trainer.extend(CommandsExtension()) # [ChainerUI] save 'args' to show experimental conditions save_args(args, outdir) if args.resume: # Resume from a snapshot chainer.serializers.load_npz(args.resume, trainer) trainer.run() # save final model model_outdir = path.join(ROOT_PATH, 'models', model_dir_name) if not path.exists(model_outdir): os.makedirs(model_outdir) model_name = 'AEFINet_concat_opt_{}_ch_{}_fsize_{}.npz'.format( args.opt, args.ch, args.fsize) chainer.serializers.save_npz(path.join(model_outdir, model_name), model) model_parameter = { 'name': 'AEFINetConcat', 'parameter': { 'f_size': args.fsize, 'ch': args.ch } } with open(path.join(model_outdir, 'model_parameter.json'), 'w') as f: json.dump(model_parameter, f)
args.translate, args.translate_range, args.min_dim, args.coord_normalize, args.gcn, args.n_joints, args.fname_index, args.joint_index, args.symmetric_joints, args.ignore_label) train_iter = iterators.MultiprocessIterator(train_dataset, args.batchsize) test_iter = iterators.MultiprocessIterator(test_dataset, args.batchsize, repeat=False, shuffle=False) gpus = [int(i) for i in args.gpus.split(',')] devices = {'main': gpus[0]} if len(gpus) > 2: for gid in gpus[1:]: devices.update({'gpu{}'.format(gid): gid}) updater = training.ParallelUpdater(train_iter, opt, devices=devices) interval = (args.snapshot, 'epoch') trainer = training.Trainer(updater, (args.epoch, 'epoch'), out=result_dir) trainer.extend(extensions.dump_graph('main/loss')) # Save parameters and optimization state trainer.extend(extensions.snapshot_object(model, 'epoch-{.updater.epoch}.model'), trigger=interval) trainer.extend(extensions.snapshot_object(opt, 'epoch-{.updater.epoch}.state'), trigger=interval) trainer.extend(extensions.snapshot(), trigger=interval) if args.opt == 'MomentumSGD' or args.opt == 'AdaGrad':
def main(args): assert((args.depth - args.block - 1) % args.block == 0) n_layer = (args.depth - args.block - 1) / args.block if args.dataset == 'cifar10': mean = numpy.asarray((125.3,123.0,113.9))#from fb.resnet.torch std = numpy.asarray((63.0, 62.1, 66.7))# Did the std data computed from 0 padding images? train, test = dataset.EXget_cifar10(scale=255,mean=mean,std=std) n_class = 10 elif args.dataset == 'cifar100': mean = numpy.asarray((129.3,124.1,112.4))#from fb.resnet.torch std = numpy.asarray((68.2, 65.4, 70.4)) train, test = dataset.EXget_cifar100(scale=255,mean=mean,std=std) n_class = 100 elif args.dataset == 'SVHN': raise NotImplementedError() train = PreprocessedDataset(train, random=True) test = PreprocessedDataset(test) train_iter = chainer.iterators.MultiprocessIterator(train, args.batchsize) test_iter = chainer.iterators.MultiprocessIterator( test, args.batchsize, repeat=False, shuffle=False) model = chainer.links.Classifier(DenseNet(n_layer, args.growth_rate, n_class, args.drop_ratio, 16, args.block)) if args.init_model: serializers.load_npz(args.init_model, model) import EXoptimizers optimizer = EXoptimizers.originalNesterovAG(lr=args.lr / len(args.gpus), momentum=0.9) optimizer.setup(model) optimizer.add_hook(chainer.optimizer.WeightDecay(args.weight_decay)) devices = {'main': args.gpus[0]} if len(args.gpus) > 1: for gid in args.gpus[1:]: devices['gpu%d' % gid] = gid updater = training.ParallelUpdater(train_iter, optimizer, devices=devices) trainer = training.Trainer(updater, (args.epoch, 'epoch'), out=args.dir) val_interval = (1, 'epoch') log_interval = (1, 'epoch') def lr_shift(): # DenseNet specific! if updater.epoch == 151 or updater.epoch == 226: optimizer.lr *= 0.1 return optimizer.lr trainer.extend(Evaluator( test_iter, model, device=args.gpus[0]), trigger=val_interval) trainer.extend(extensions.observe_value( 'lr', lambda _: lr_shift()), trigger=(1, 'epoch')) trainer.extend(extensions.dump_graph('main/loss')) trainer.extend(extensions.snapshot_object( model, 'epoch_{.updater.epoch}.model'), trigger=val_interval) trainer.extend(extensions.snapshot_object( optimizer, 'epoch_{.updater.epoch}.state'), trigger=val_interval) trainer.extend(extensions.LogReport(trigger=log_interval)) trainer.extend(extensions.observe_lr(), trigger=log_interval) start_time = time.time() trainer.extend(extensions.observe_value( 'time', lambda _: time.time() - start_time), trigger=log_interval) trainer.extend(extensions.PrintReport([ 'time', 'epoch', 'iteration', 'main/loss', 'validation/main/loss', 'main/accuracy', 'validation/main/accuracy', 'lr', ]), trigger=log_interval) trainer.extend(extensions.observe_value( 'graph', lambda _: create_fig(args.dir)), trigger=(2, 'epoch')) trainer.extend(extensions.ProgressBar(update_interval=10)) trainer.run()
def main(args): assert ((args.depth - args.block - 1) % args.block == 0) n_layer = (args.depth - args.block - 1) / args.block if args.dataset == 'cifar10': train, test = cifar.get_cifar10() n_class = 10 elif args.dataset == 'cifar100': train, test = cifar.get_cifar100() n_class = 100 elif args.dataset == 'SVHN': raise NotImplementedError() mean = numpy.zeros((3, 32, 32), dtype=numpy.float32) for image, _ in train: mean += image / len(train) train = PreprocessedDataset(train, mean, random=True) test = PreprocessedDataset(test, mean) train_iter = chainer.iterators.MultiprocessIterator(train, args.batchsize) test_iter = chainer.iterators.MultiprocessIterator(test, args.batchsize, repeat=False, shuffle=False) model = chainer.links.Classifier( DenseNet(n_layer, args.growth_rate, n_class, args.drop_ratio, 16, args.block)) if args.init_model: serializers.load_npz(args.init_model, model) optimizer = chainer.optimizers.MomentumSGD(lr=args.lr / len(args.gpus), momentum=0.9) optimizer.setup(model) optimizer.add_hook(chainer.optimizer.WeightDecay(args.weight_decay)) devices = {'main': args.gpus[0]} if len(args.gpus) > 2: for gid in args.gpus[1:]: devices['gpu%d' % gid] = gid updater = training.ParallelUpdater(train_iter, optimizer, devices=devices) trainer = training.Trainer(updater, (args.epoch, 'epoch'), out=args.dir) val_interval = (1, 'epoch') log_interval = (1, 'epoch') eval_model = model.copy() eval_model.train = False trainer.extend(extensions.Evaluator(test_iter, eval_model, device=args.gpus[0]), trigger=val_interval) trainer.extend(extensions.ExponentialShift('lr', args.lr_decay_ratio), trigger=(args.lr_decay_freq, 'epoch')) trainer.extend(extensions.dump_graph('main/loss')) trainer.extend(extensions.snapshot_object(model, 'epoch_{.updater.epoch}.model'), trigger=val_interval) trainer.extend(extensions.snapshot_object(optimizer, 'epoch_{.updater.epoch}.state'), trigger=val_interval) trainer.extend(extensions.LogReport(trigger=log_interval)) trainer.extend(extensions.observe_lr(), trigger=log_interval) start_time = time.time() trainer.extend(extensions.observe_value( 'time', lambda _: time.time() - start_time), trigger=log_interval) trainer.extend(extensions.PrintReport([ 'time', 'epoch', 'iteration', 'main/loss', 'validation/main/loss', 'main/accuracy', 'validation/main/accuracy', 'lr', ]), trigger=log_interval) trainer.extend(extensions.observe_value('graph', lambda _: create_fig(args.dir)), trigger=(2, 'epoch')) trainer.extend(extensions.ProgressBar(update_interval=10)) trainer.run()
batchsize = 100 train_iter = iterators.SerialIterator(train_data, batchsize) test_iter = iterators.SerialIterator(test_data, batchsize, repeat = False, shuffle = False) # import pdb; pdb.set_trace() # setup model model = LSTM(IN_UNITS, HIDDEN_UNITS, OUT_UNITS) # setup optimizer optimizer = optimizers.Adam() optimizer.setup(model) start = time.time() # updater = training.StandardUpdater(train_iter, optimizer, MyConverter) updater = training.ParallelUpdater(train_iter, optimizer, MyConverter, devices={'main': -1, 'second': -2}) trainer = training.Trainer(updater, (20, 'epoch'), out='result') trainer.extend(extensions.LogReport()) trainer.extend(extensions.dump_graph('main/loss')) trainer.extend(extensions.observe_lr()) trainer.extend(extensions.Evaluator(test_iter, model, MyConverter), name= 'val') trainer.extend(extensions.PrintReport(['epoch', 'main/loss', 'val/main/loss', 'elapsed_time', 'lr'])) trainer.extend(extensions.PlotReport(['main/loss', 'val/main/loss'], x_key = 'epoch', file_name= 'loss.png')) trainer.extend(extensions.ProgressBar()) trainer.run() end = time.time() print("{}[sec]".format(end - start))
def main(): FLAGS(sys.argv) # 0. load dataset char_list = [ line.strip().split('\t')[0] for line in open(FLAGS.vocab_file) ] char_to_id = char2id = {c: i for i, c in enumerate(char_list)} h5f = h5py.File(path.normpath(FLAGS.data_file), 'r') data = h5f['data'][:] train_data = data print_len = len(train_data[0]) h5f.close() n, max_len = data.shape charset_size = len(char_list) + 1 save_dir = path.normpath(FLAGS.save_dir) # 1. build model if FLAGS.model == 'rnnlm': model = Decoder(charset_size=charset_size, hidden_size=FLAGS.hidden_size, n_layers=FLAGS.n_layers, dropout=FLAGS.dropout) gpu_id_list = [int(_) for _ in FLAGS.gpu_id_list.split(',') ] if len(FLAGS.gpu_id_list) > 0 else [] if len(gpu_id_list) > 0: chainer.cuda.get_device_from_id(gpu_id_list[0]).use() model.to_gpu() load_model = path.join(save_dir, FLAGS.load_model) if os.path.exists(load_model): print('load model snapshot from %s' % load_model) serializers.load_npz(load_model, model) from lv import ZiFeature, calc_mask_5 zf = ZiFeature() def func_mask(ys): return calc_mask_5(zf=zf, prefix=ys, char_list=char_list, char2id=char2id, offset=1) if FLAGS.demo_mode: print('demo starts. enter prefix or `exit` for exiting.') while True: line = sys.stdin.readline().strip() if line == 'exit': break guide_ids = [ char_to_id[c] + 1 for c in line.strip() if c in char_to_id ] for t in [1., 1.5, 2., 2.5, 3.]: ys = model.sample(batch_size=5, use_random=True, temperature=t, max_len=print_len, guide_ids=guide_ids, func_mask=func_mask) for y in ys: print('[t=%.3f] %s' % (t, gs(y, char_list))) print('-' * print_len) return optimizer = chainer.optimizers.Adam() optimizer.setup(model) train_iter = chainer.iterators.SerialIterator(train_data, FLAGS.batch_size) if len(gpu_id_list) == 0: updater = training.StandardUpdater(train_iter, optimizer, device=-1) elif len(gpu_id_list) == 1: gpu_id = gpu_id_list[0] updater = training.StandardUpdater(train_iter, optimizer, device=gpu_id) else: devices = {('main' if index == 0 else ('second%d' % gpu_id)): gpu_id for index, gpu_id in enumerate(gpu_id_list)} print('multiple gpu training with devices = %s' % devices) updater = training.ParallelUpdater( train_iter, optimizer, devices=devices, ) trainer = training.Trainer(updater, stop_trigger=(FLAGS.n_epoch, 'epoch'), out=save_dir) trainer.extend( extensions.LogReport(trigger=(FLAGS.log_interval, 'iteration'))) trainer.extend(extensions.PrintReport( ['epoch', 'iteration', 'main/loss', 'main/perp', 'elapsed_time']), trigger=(FLAGS.log_interval, 'iteration')) trainer.extend( extensions.snapshot( filename='trainer_snapshot_iter_{.updater.iteration}')) trainer.extend( extensions.snapshot(filename='trainer_snapshot_iter_latest')) trainer.extend( extensions.snapshot_object( target=model, filename='model_snapshot_iter_{.updater.iteration}')) trainer.extend( extensions.snapshot_object(target=model, filename='model_snapshot_iter_latest')) trainer.extend(extensions.ProgressBar()) if FLAGS.show_sample: @chainer.training.make_extension() def sample(trainer): for temperature in [1.0, 1.3, 1.6, 1.9, 2.1]: print('sample (use random, t=%.2f):' % temperature) ys = model.sample(batch_size=2, use_random=True, temperature=temperature, max_len=print_len, func_mask=func_mask) for y in ys: print('%s' % (gs(y, char_list))) print('-' * print_len) print('sample (use max):') ys = model.sample(batch_size=1, use_random=False, func_mask=func_mask) for y in ys: print('%s' % (gs(y, char_list))) print('-' * print_len) trainer.extend(sample, trigger=(1, 'epoch')) load_trainer = path.join(save_dir, FLAGS.load_trainer) if os.path.exists(load_trainer): print('load trainer snapshot from %s' % load_trainer) serializers.load_npz(load_trainer, trainer) print('start training') trainer.run()