def test_savefun_and_writer_exclusive(self): # savefun and writer arguments cannot be specified together. def savefun(*args, **kwargs): assert False writer = extensions.snapshot_writers.SimpleWriter() with pytest.raises(TypeError): extensions.snapshot(savefun=savefun, writer=writer) trainer = mock.MagicMock() with pytest.raises(TypeError): extensions.snapshot_object(trainer, savefun=savefun, writer=writer)
def test_save_file(self): w = extensions.snapshot_writers.SimpleWriter() snapshot = extensions.snapshot_object(self.trainer, 'myfile.dat', writer=w) snapshot(self.trainer) self.assertTrue(os.path.exists('myfile.dat'))
def test_clean_up_tempdir(self): snapshot = extensions.snapshot_object(self.trainer, 'myfile.dat') snapshot(self.trainer) left_tmps = [fn for fn in os.listdir('.') if fn.startswith('tmpmyfile.dat')] self.assertEqual(len(left_tmps), 0)
def train(): model = SuperResolution() if DEVICE >= 0: chainer.cuda.get_device_from_id(DEVICE).use() chainer.cuda.check_cuda_available() print("USEDEVICE", DEVICE) model.to_gpu() images = collect_train_patch('train') train_iter = iterators.SerialIterator(images, BATCH_SIZE, shuffle=True) optimizer = optimizers.Adam() optimizer.setup(model) updater = SRUpdater(train_iter, optimizer, device=DEVICE) snapshot_interval=(500, 'epoch') trainer = training.Trainer(updater, (10000, 'epoch'), out='result') trainer.extend(extensions.snapshot( filename='snapshot_epoch_{.updater.epoch}.npz'), trigger=snapshot_interval) trainer.extend(extensions.ProgressBar()) trainer.extend(extensions.snapshot_object(model,'model_epoch_{.updater.epoch}.npz'), trigger=snapshot_interval) if RESUME: # Resume from a snapshot chainer.serializers.load_npz('result/snapshot_epoch_25.npz', trainer) trainer.run() chainer.serializers.save_hdf5('model.hdf5', model)
def train(): model_gen = Generator() model_dis = Discriminator() if DEVICE >= 0: chainer.cuda.get_device_from_id(DEVICE).use() chainer.cuda.check_cuda_available() model_gen.to_gpu(DEVICE) model_dis.to_gpu(DEVICE) images = [] fs = os.listdir('train') for f in fs: img = Image.open( 'train/'+f).convert('RGB').resize((IMAGE_SIZE, IMAGE_SIZE)) hpix = np.array(img, dtype=np.float32)/255.0 hpix = hpix.transpose(2, 0, 1) images.append(hpix) train_iter = iterators.SerialIterator(images, BATCH_SIZE, shuffle=True) optimizer_gen = optimizers.Adam(alpha=0.0002, beta1=0.5) optimizer_gen.setup(model_gen) optimizers_dis = optimizers.Adam(alpha=0.0002, beta1=0.5) optimizers_dis.setup(model_dis) updater = Updater( train_iter, {'opt_gen': optimizer_gen, 'opt_dis': optimizers_dis}, device=DEVICE) trainer = training.Trainer(updater, (100000, 'epoch'), out='result') trainer.extend(extensions.ProgressBar()) snapshot_interval = (5000, 'epoch') trainer.extend(extensions.snapshot( filename='snapshot_epoch_{.updater.epoch}.npz'), trigger=snapshot_interval) trainer.extend(extensions.snapshot_object( model_gen, 'model_gen_epoch_{.updater.epoch}.npz'), trigger=snapshot_interval) trainer.extend(extensions.snapshot_object( model_dis, 'model_dis_epoch_{.updater.epoch}.npz'), trigger=snapshot_interval) if RESUME: chainer.serializers.load_npz('result/snapshot_epoch_26797.npz',trainer) trainer.run()
def get_trainer(optimizer, iter_train, iter_valid, iter_valid_raw, class_names, args): model = optimizer.target updater = chainer.training.StandardUpdater( iter_train, optimizer, device=args.gpu) trainer = chainer.training.Trainer( updater, (args.max_iteration, 'iteration'), out=args.out) trainer.extend(fcn.extensions.ParamsReport(args.__dict__)) trainer.extend(extensions.ProgressBar(update_interval=5)) trainer.extend(extensions.LogReport( trigger=(args.interval_print, 'iteration'))) trainer.extend(extensions.PrintReport( ['epoch', 'iteration', 'elapsed_time', 'main/loss', 'validation/main/miou'])) def pred_func(x): model(x) return model.score trainer.extend( fcn.extensions.SemanticSegmentationVisReport( pred_func, iter_valid_raw, transform=fcn.datasets.transform_lsvrc2012_vgg16, class_names=class_names, device=args.gpu, shape=(4, 2)), trigger=(args.interval_eval, 'iteration')) trainer.extend( chainercv.extensions.SemanticSegmentationEvaluator( iter_valid, model, label_names=class_names), trigger=(args.interval_eval, 'iteration')) trainer.extend(extensions.snapshot_object( target=model, filename='model_best.npz'), trigger=chainer.training.triggers.MaxValueTrigger( key='validation/main/miou', trigger=(args.interval_eval, 'iteration'))) assert extensions.PlotReport.available() trainer.extend(extensions.PlotReport( y_keys=['main/loss'], x_key='iteration', file_name='loss.png', trigger=(args.interval_print, 'iteration'))) trainer.extend(extensions.PlotReport( y_keys=['validation/main/miou'], x_key='iteration', file_name='miou.png', trigger=(args.interval_print, 'iteration'))) return trainer
def main(model): train = read_data(BUZZER_TRAIN_FOLD) valid = read_data(BUZZER_DEV_FOLD) print('# train data: {}'.format(len(train))) print('# valid data: {}'.format(len(valid))) train_iter = chainer.iterators.SerialIterator(train, 64) valid_iter = chainer.iterators.SerialIterator( valid, 64, repeat=False, shuffle=False) optimizer = chainer.optimizers.Adam() optimizer.setup(model) optimizer.add_hook(chainer.optimizer.WeightDecay(1e-4)) updater = training.updaters.StandardUpdater( train_iter, optimizer, converter=convert_seq, device=0) trainer = training.Trainer(updater, (20, 'epoch'), out=model.model_dir) trainer.extend(extensions.Evaluator( valid_iter, model, converter=convert_seq, device=0)) record_trigger = training.triggers.MaxValueTrigger( 'validation/main/accuracy', (1, 'epoch')) trainer.extend(extensions.snapshot_object( model, 'buzzer.npz'), trigger=record_trigger) trainer.extend(extensions.LogReport()) trainer.extend(extensions.PrintReport( ['epoch', 'main/loss', 'validation/main/loss', 'main/accuracy', 'validation/main/accuracy', 'elapsed_time'])) trainer.extend(extensions.ProgressBar()) if not os.path.isdir(model.model_dir): os.mkdir(model.model_dir) # Run the training trainer.run()
def test_on_error(self): class TheOnlyError(Exception): pass @training.make_extension(trigger=(1, 'iteration'), priority=100) def exception_raiser(trainer): raise TheOnlyError() self.trainer.extend(exception_raiser) snapshot = extensions.snapshot_object(self.trainer, self.filename, snapshot_on_error=True) self.trainer.extend(snapshot) self.assertFalse(os.path.exists(self.filename)) with self.assertRaises(TheOnlyError): self.trainer.run() self.assertTrue(os.path.exists(self.filename))
def train(num_loop): chainer.cuda.get_device_from_id(0).use() model=CnnModel() model.to_gpu() optimizer=optimizers.Adam() optimizer.setup(model) minibatch_size=1000 train, test = datasets.get_mnist(ndim=3) iterator = iterators.SerialIterator(train, minibatch_size) updater=training.StandardUpdater(iterator,optimizer,device=0) loops=(num_loop,'epoch') if not os.path.exists('result'): os.mkdir('result') trainer = training.Trainer(updater,loops,out='result') trainer.extend(extensions.ProgressBar()) trainer.extend(extensions.snapshot_object( model, 'cnn_{.updater.epoch}.npz'), trigger=(1,'epoch')) print('start to train') trainer.run() print('finish to train')
def run(datasetPath, resultPath, modelPath="", resumePath=""): # set dataset if isinstance(datasetPath, str): ds = datasetVOC(datasetPath, 32) elif isinstance(datasetPath, list): ds = datasetVOCs(datasetPath, 32) else: raise Exception("データセットパスの型が不正です。") train, test = ds.getDataset() # set model model = chainer.links.Classifier(Alex()) if os.path.isfile(modelPath): chainer.serializers.load_npz(modelPath, model) optimizer = chainer.optimizers.Adam() optimizer.setup(model) # set evaluation model eval_model = model.copy() eval_model.train = False # train and test train_iter = chainer.iterators.SerialIterator(train, BATCH_SIZE) test_iter = chainer.iterators.SerialIterator(test, BATCH_SIZE, repeat=False, shuffle=False) updater = chainer.training.StandardUpdater(train_iter, optimizer, device=-1) trainer = chainer.training.Trainer(updater, (EPOCH, "epoch"), out=resultPath) trainer.extend(extensions.Evaluator(test_iter, eval_model, device=-1)) trainer.extend(extensions.LogReport()) trainer.extend( extensions.PrintReport( ["epoch", "main/loss", "validation/main/loss", "main/accuracy", "validation/main/accuracy"] ) ) trainer.extend(extensions.ProgressBar(update_interval=5)) trainer.extend(extensions.snapshot(filename="snapshot_epoch_{.updater.epoch}")) trainer.extend(extensions.snapshot_object(model, filename="model_epoch_{.updater.epoch}")) trainer.extend(extensions.dump_graph("main/loss")) if os.path.isfile(resumePath): chainer.serializers.load_npz(resumePath, trainer) trainer.run()
def test_save_file(self): snapshot = extensions.snapshot_object(self.trainer, 'myfile.dat') snapshot(self.trainer) self.assertTrue(os.path.exists('myfile.dat'))
model.rcnn_train = True # optimizer = optimizers.Adam() # optimizer.setup(model) optimizer = optimizers.MomentumSGD(lr=0.001) optimizer.setup(model) optimizer.add_hook(chainer.optimizer.WeightDecay(0.0005)) updater = training.StandardUpdater(train_iter, optimizer, device=0) trainer = training.Trainer(updater, (100, 'epoch'), out='train_rcnn') trainer.extend(extensions.LogReport(trigger=(100, 'iteration'))) trainer.extend(extensions.PrintReport([ 'epoch', 'iteration', 'main/loss_cls', 'main/cls_accuracy', 'main/loss_bbox', 'main/loss_rcnn', 'elapsed_time', ]), trigger=(100, 'iteration')) trainer.extend( extensions.snapshot_object(model, 'snapshot_{.updater.iteration}'), trigger=(1000, 'iteration')) trainer.extend(extensions.PlotReport(['main/loss_rcnn'], trigger=(100, 'iteration'))) trainer.extend(extensions.PlotReport(['main/cls_accuracy'], trigger=(100, 'iteration'))) trainer.extend( extensions.dump_graph('main/loss_rcnn', out_name='loss_rcnn.dot')) trainer.run()
def main(): parser = argparse.ArgumentParser(description='ChainerMN example: DCGAN') parser.add_argument('--batchsize', '-b', type=int, default=50, help='Number of images in each mini-batch') parser.add_argument('--communicator', type=str, default='hierarchical', help='Type of communicator') parser.add_argument('--epoch', '-e', type=int, default=1000, help='Number of sweeps over the dataset to train') parser.add_argument('--gpu', '-g', action='store_true', help='Use GPU') parser.add_argument('--dataset', '-i', default='', help='Directory of image files. Default is cifar-10.') parser.add_argument('--out', '-o', default='result', help='Directory to output the result') parser.add_argument('--gen_model', '-r', default='', help='Use pre-trained generator for training') parser.add_argument('--dis_model', '-d', default='', help='Use pre-trained discriminator for training') parser.add_argument('--n_hidden', '-n', type=int, default=100, help='Number of hidden units (z)') parser.add_argument('--seed', type=int, default=0, help='Random seed of z at visualization stage') parser.add_argument('--snapshot_interval', type=int, default=1000, help='Interval of snapshot') parser.add_argument('--display_interval', type=int, default=100, help='Interval of displaying log to console') args = parser.parse_args() # Prepare ChainerMN communicator. if args.gpu: if args.communicator == 'naive': print("Error: 'naive' communicator does not support GPU.\n") exit(-1) comm = chainermn.create_communicator(args.communicator) device = comm.intra_rank else: if args.communicator != 'naive': print('Warning: using naive communicator ' 'because only naive supports CPU-only execution') comm = chainermn.create_communicator('naive') device = -1 if comm.rank == 0: print('==========================================') print('Num process (COMM_WORLD): {}'.format(comm.size)) if args.gpu: print('Using GPUs') print('Using {} communicator'.format(args.communicator)) print('Num hidden unit: {}'.format(args.n_hidden)) print('Num Minibatch-size: {}'.format(args.batchsize)) print('Num epoch: {}'.format(args.epoch)) print('==========================================') # Set up a neural network to train gen = Generator(n_hidden=args.n_hidden) dis = Discriminator() if device >= 0: # Make a specified GPU current chainer.cuda.get_device_from_id(device).use() gen.to_gpu() # Copy the model to the GPU dis.to_gpu() # Setup an optimizer def make_optimizer(model, comm, alpha=0.0002, beta1=0.5): # Create a multi node optimizer from a standard Chainer optimizer. optimizer = chainermn.create_multi_node_optimizer( chainer.optimizers.Adam(alpha=alpha, beta1=beta1), comm) optimizer.setup(model) optimizer.add_hook(chainer.optimizer.WeightDecay(0.0001), 'hook_dec') return optimizer opt_gen = make_optimizer(gen, comm) opt_dis = make_optimizer(dis, comm) # Split and distribute the dataset. Only worker 0 loads the whole dataset. # Datasets of worker 0 are evenly split and distributed to all workers. if comm.rank == 0: if args.dataset == '': # Load the CIFAR10 dataset if args.dataset is not specified train, _ = chainer.datasets.get_cifar10(withlabel=False, scale=255.) else: all_files = os.listdir(args.dataset) image_files = [f for f in all_files if ('png' in f or 'jpg' in f)] print('{} contains {} image files' .format(args.dataset, len(image_files))) train = chainer.datasets\ .ImageDataset(paths=image_files, root=args.dataset) else: train = None train = chainermn.scatter_dataset(train, comm) train_iter = chainer.iterators.SerialIterator(train, args.batchsize) # Set up a trainer updater = DCGANUpdater( models=(gen, dis), iterator=train_iter, optimizer={ 'gen': opt_gen, 'dis': opt_dis}, device=device) trainer = training.Trainer(updater, (args.epoch, 'epoch'), out=args.out) # Some display and output extensions are necessary only for one worker. # (Otherwise, there would just be repeated outputs.) if comm.rank == 0: snapshot_interval = (args.snapshot_interval, 'iteration') display_interval = (args.display_interval, 'iteration') # Save only model parameters. # `snapshot` extension will save all the trainer module's attribute, # including `train_iter`. # However, `train_iter` depends on scattered dataset, which means that # `train_iter` may be different in each process. # Here, instead of saving whole trainer module, only the network models # are saved. trainer.extend(extensions.snapshot_object( gen, 'gen_iter_{.updater.iteration}.npz'), trigger=snapshot_interval) trainer.extend(extensions.snapshot_object( dis, 'dis_iter_{.updater.iteration}.npz'), trigger=snapshot_interval) trainer.extend(extensions.LogReport(trigger=display_interval)) trainer.extend(extensions.PrintReport([ 'epoch', 'iteration', 'gen/loss', 'dis/loss', 'elapsed_time', ]), trigger=display_interval) trainer.extend(extensions.ProgressBar(update_interval=10)) trainer.extend( out_generated_image( gen, dis, 10, 10, args.seed, args.out), trigger=snapshot_interval) # Start the training using pre-trained model, saved by snapshot_object if args.gen_model: chainer.serializers.load_npz(args.gen_model, gen) if args.dis_model: chainer.serializers.load_npz(args.dis_model, dis) # Run the training trainer.run()
def train(args): '''Run training''' # seed setting torch.manual_seed(args.seed) # debug mode setting # 0 would be fastest, but 1 seems to be reasonable # by considering reproducability # revmoe type check if args.debugmode < 2: chainer.config.type_check = False logging.info('torch type check is disabled') # use determinisitic computation or not if args.debugmode < 1: torch.backends.cudnn.deterministic = False logging.info('torch cudnn deterministic is disabled') else: torch.backends.cudnn.deterministic = True # check cuda availability if not torch.cuda.is_available(): logging.warning('cuda is not available') # get input and output dimension info with open(args.valid_label, 'rb') as f: valid_json = json.load(f)['utts'] utts = list(valid_json.keys()) idim = int(valid_json[utts[0]]['idim']) odim = int(valid_json[utts[0]]['odim']) logging.info('#input dims : ' + str(idim)) logging.info('#output dims: ' + str(odim)) # specify model architecture e2e = E2E(idim, odim, args) model = Loss(e2e, args.mtlalpha) # write model config if not os.path.exists(args.outdir): os.makedirs(args.outdir) model_conf = args.outdir + '/model.conf' with open(model_conf, 'wb') as f: logging.info('writing a model config file to' + model_conf) # TODO(watanabe) use others than pickle, possibly json, and save as a text pickle.dump((idim, odim, args), f) for key in sorted(vars(args).keys()): logging.info('ARGS: ' + key + ': ' + str(vars(args)[key])) # Set gpu if args.ngpu > 1: logging.warn( "currently, pytorch does not support multi-gpu. use single gpu.") if args.ngpu > 0: gpu_id = 0 # Make a specified GPU current model.cuda(gpu_id) # Copy the model to the GPU else: gpu_id = -1 # Setup an optimizer if args.opt == 'adadelta': optimizer = torch.optim.Adadelta(model.parameters(), rho=0.95, eps=args.eps) elif args.opt == 'adam': optimizer = torch.optim.Adam(model.parameters()) # FIXME: TOO DIRTY HACK setattr(optimizer, "target", model.reporter) setattr(optimizer, "serialize", lambda s: model.reporter.serialize(s)) # read json data with open(args.train_label, 'rb') as f: train_json = json.load(f)['utts'] with open(args.valid_label, 'rb') as f: valid_json = json.load(f)['utts'] # make minibatch list (variable length) train = make_batchset(train_json, args.batch_size, args.maxlen_in, args.maxlen_out, args.minibatches) valid = make_batchset(valid_json, args.batch_size, args.maxlen_in, args.maxlen_out, args.minibatches) # hack to make batchsze argument as 1 # actual bathsize is included in a list train_iter = chainer.iterators.SerialIterator(train, 1) valid_iter = chainer.iterators.SerialIterator(valid, 1, repeat=False, shuffle=False) # prepare Kaldi reader train_reader = lazy_io.read_dict_scp(args.train_feat) valid_reader = lazy_io.read_dict_scp(args.valid_feat) # Set up a trainer updater = PytorchSeqUpdaterKaldi(model, args.grad_clip, train_iter, optimizer, train_reader, gpu_id) trainer = training.Trainer(updater, (args.epochs, 'epoch'), out=args.outdir) # Resume from a snapshot if args.resume: chainer.serializers.load_npz(args.resume, trainer) model = trainer.updater.model # Evaluate the model with the test dataset for each epoch trainer.extend( PytorchSeqEvaluaterKaldi(model, valid_iter, model.reporter, valid_reader, device=gpu_id)) # Take a snapshot for each specified epoch trainer.extend(extensions.snapshot(), trigger=(1, 'epoch')) # Make a plot for training and validation values trainer.extend( extensions.PlotReport([ 'main/loss', 'validation/main/loss', 'main/loss_ctc', 'validation/main/loss_ctc', 'main/loss_att', 'validation/main/loss_att' ], 'epoch', file_name='loss.png')) trainer.extend( extensions.PlotReport(['main/acc', 'validation/main/acc'], 'epoch', file_name='acc.png')) # Save best models def torch_save(path, _): torch.save(model.state_dict(), path) torch.save(model, path + ".pkl") trainer.extend( extensions.snapshot_object(model, 'model.loss.best', savefun=torch_save), trigger=training.triggers.MinValueTrigger('validation/main/loss')) trainer.extend( extensions.snapshot_object(model, 'model.acc.best', savefun=torch_save), trigger=training.triggers.MaxValueTrigger('validation/main/acc')) # epsilon decay in the optimizer def torch_load(path, obj): model.load_state_dict(torch.load(path)) return obj if args.opt == 'adadelta': if args.criterion == 'acc': trainer.extend(restore_snapshot(model, args.outdir + '/model.acc.best', load_fn=torch_load), trigger=CompareValueTrigger( 'validation/main/acc', lambda best_value, current_value: best_value > current_value)) trainer.extend(adadelta_eps_decay(args.eps_decay), trigger=CompareValueTrigger( 'validation/main/acc', lambda best_value, current_value: best_value > current_value)) elif args.criterion == 'loss': trainer.extend(restore_snapshot(model, args.outdir + '/model.loss.best', load_fn=torch_load), trigger=CompareValueTrigger( 'validation/main/loss', lambda best_value, current_value: best_value < current_value)) trainer.extend(adadelta_eps_decay(args.eps_decay), trigger=CompareValueTrigger( 'validation/main/loss', lambda best_value, current_value: best_value < current_value)) # Write a log of evaluation statistics for each epoch trainer.extend(extensions.LogReport(trigger=(100, 'iteration'))) report_keys = [ 'epoch', 'iteration', 'main/loss', 'main/loss_ctc', 'main/loss_att', 'validation/main/loss', 'validation/main/loss_ctc', 'validation/main/loss_att', 'main/acc', 'validation/main/acc', 'elapsed_time' ] if args.opt == 'adadelta': trainer.extend(extensions.observe_value( 'eps', lambda trainer: trainer.updater.get_optimizer('main'). param_groups[0]["eps"]), trigger=(100, 'iteration')) report_keys.append('eps') trainer.extend(extensions.PrintReport(report_keys), trigger=(100, 'iteration')) trainer.extend(extensions.ProgressBar()) # Run the training trainer.run()
def main(): parser = argparse.ArgumentParser(description='Embedding Compressor', formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument('--batchsize', '-b', type=int, default=128, help='Number of sentences in each mini-batch') parser.add_argument('--iter', '-i', dest='iteration', type=int, default=200000, help='Number of iterations') parser.add_argument('--gpu', '-g', type=int, default=-1, help='GPU ID (negative value indicates CPU)') parser.add_argument('--resume', '-r', default='', help='Resume the training from snapshot') parser.add_argument('--optimizer', '-O', dest='optimizer', type=str, default='Adam', choices=['Adam', 'SGD'], help='Type of optimizer') parser.add_argument('--learning-rate', '--lr', dest='learning_rate', type=float, default=0.0001, help='learning rate') parser.add_argument('--M' '-M', dest='n_codebooks', type=int, default=32, help='Number of Codebooks') parser.add_argument('--K' '-K', dest='n_centroids', type=int, default=16, help='Number of Centroids (Number of vectors in each codebook)') parser.add_argument('--tau', dest='tau', type=float, default=1.0, help='Tau value in Gumbel-softmax') # Arguments for the dataset / vocabulary path parser.add_argument('--input-matrix', dest='input_matrix', required=True, help='path to the matrix (npy)') # Random Seed parser.add_argument('--seed', default=0, type=int, help='Seed for Random Module') # Arguments for directory parser.add_argument('--out', '-o', default='./result', type=os.path.abspath, help='Directory to output the result') parser.add_argument('--dir-prefix', dest='dir_prefix', default='model', type=str, help='Prefix of the output dir') args = parser.parse_args() set_random_seed(args.seed, args.gpu) resource = Resource(args, train=True) resource.dump_git_info() resource.dump_command_info() resource.dump_python_info() resource.dump_chainer_info() resource.save_config_file() logger = resource.logger dataset = DataProcessor(resource.log_name) dataset.load_embed_matrix(args.input_matrix) train_data = dataset.load_data('train') valid_data = dataset.load_data('dev') model = EmbeddingCompressor( n_vocab=dataset.embed_matrix.shape[0], embed_dim=dataset.embed_matrix.shape[1], n_codebooks=args.n_codebooks, n_centroids=args.n_centroids, tau=args.tau, embed_mat=dataset.embed_matrix ) if args.optimizer == 'Adam': optimizer = chainer.optimizers.Adam(alpha=args.learning_rate) else: optimizer = chainer.optimizers.SGD(lr=args.learning_rate) optimizer.setup(model) logger.info('Optimizer is set to [{}]'.format(args.optimizer)) model.embed_mat.disable_update() # call this after optimizer.setup() logger.info('Updating Embedding Layer is Disabled') # Send model to GPU (according to the arguments) if args.gpu >= 0: chainer.cuda.get_device_from_id(args.gpu).use() model.to_gpu(args.gpu) train_iter = SerialIterator(dataset=train_data, batch_size=args.batchsize, shuffle=True) updater = training.updater.StandardUpdater(train_iter, optimizer, device=args.gpu) trainer = training.Trainer(updater, (args.iteration, 'iteration'), out=resource.output_dir) short_term = (1000, 'iteration') dev_iter = SerialIterator(valid_data, args.batchsize, repeat=False) trainer.extend( extensions.Evaluator(dev_iter, model, device=args.gpu), trigger=short_term) trainer.extend(extensions.ProgressBar(update_interval=1)) trainer.extend(extensions.LogReport(trigger=short_term, log_name='chainer_report_iteration.log'), trigger=short_term, name='iteration') trainer.extend(extensions.LogReport(trigger=short_term, log_name='chainer_report_epoch.log'), trigger=short_term, name='epoch') trainer.extend(extensions.snapshot_object(model, 'iter_{.updater.iteration}.npz', savefun=save_non_embed_npz), trigger=MinValueTrigger('validation/main/loss', short_term)) entries = ['epoch', 'iteration', 'main/loss', 'validation/main/loss', 'main/maxp', 'validation/main/maxp'] trainer.extend(extensions.PrintReport(entries=entries, log_report='iteration'), trigger=short_term) trainer.extend(extensions.PrintReport(entries=entries, log_report='epoch'), trigger=short_term) logger.info('Start training...') trainer.run() logger.info('Training complete!!') resource.dump_duration()
def main(fi): parser = argparse.ArgumentParser() parser.add_argument('--batchsize', '-b', type=int, default=20, help='Number of examples in each mini batch') parser.add_argument('--epoch', '-e', type=int, default=39, help='Number of sweeps over the dataset to train') parser.add_argument('--gpu', '-g', type=int, default=-1, help='GPU ID (negative value indicates CPU)') parser.add_argument('--gradclip', '-c', type=float, default=5, help='Gradient norm threshold to clip') parser.add_argument('--out', '-o', default='result', help='Directory to output the result') parser.add_argument('--resume', '-r', default='', help='Resume the training from snapshot') parser.add_argument('--test', action='store_true', help='Use tiny datasets for quick tests') parser.add_argument('--w2v', default='../twitter_model.bin', help='') parser.set_defaults(test=False) parser.add_argument('--unit', '-u', type=int, default=300, help='Number of LSTM units in each layer') args = parser.parse_args() with open('data/vocab_dict.txt', "r") as f_dict: vocab = set(unicode(l.split('\t')[0]) for l in f_dict) vocab_dict = {w: i for i, w in enumerate(vocab)} # vocab_dict['<EOS>'] = len(vocab_dict) # vocab_dict['<BOS>'] = len(vocab_dict) train = get_dataset('data/twitter.train.sort', vocab_dict) val = get_dataset('data/twitter.dev', vocab_dict) test = get_dataset('data/twitter.test', vocab_dict) n_vocab = len(vocab_dict) + 1 print('#vocab =', n_vocab) w2v_model = word2vec.Word2Vec.load(args.w2v) train_iter = MyIterator(train, args.batchsize) val_iter = MyIterator(val, 1, repeat=False) test_iter = MyIterator(test, 1, repeat=False) blstm = BLSTM(n_vocab, args.unit, 2) model = L.Classifier(blstm) for key, index in vocab_dict.iteritems(): key = unicode(key) if key in w2v_model: blstm.embed.W.data[index] = w2v_model[key] if args.gpu >= 0: chainer.cuda.get_device(args.gpu).use() model.to_gpu() optimizer = chainer.optimizers.Adam() optimizer.setup(model) optimizer.add_hook(chainer.optimizer.GradientClipping(args.gradclip)) updater = MyUpdater(train_iter, optimizer, args.gpu) trainer = training.Trainer(updater, (args.epoch, 'epoch'), out=args.out) eval_model = model.copy() eval_model.train = False trainer.extend(MyEvaluator( val_iter, eval_model, device=args.gpu, eval_hook=lambda _: eval_model.predictor.reset_state()), priority=100) trainer.extend(extensions.LogReport(trigger=(1, 'epoch')), priority=90) trainer.extend(extensions.PrintReport( ['epoch', 'iteration', 'main/accuracy', 'main/validation/accuracy'] ), trigger=(1, 'epoch'), priority=80) trainer.extend(extensions.ProgressBar( update_interval=1 if args.test else 10), priority=0) trainer.extend(extensions.snapshot()) trainer.extend(extensions.snapshot_object( model, 'model_iter' + '_{.updater.epoch}')) if args.resume: chainer.serializers.load_npz(args.resume, trainer) trainer.run() print('test') eval_model.predictor.reset_state() evaluator = MyEvaluator(test_iter, eval_model, device=args.gpu) result = evaluator() print('test accuracy:', result['main/validation/accuracy'])
def __init__(self): super(MnistModel,self).__init__( l1 = L.Linear(784,100), l2 = L.Linear(100,100), l3 = L.Linear(100,10)) def __call__(self,x): h = F.relu(self.l1(x)) h = F.relu(self.l2(h)) return self.l3(h) def save_pkl(filename,obj): with open(filename,'wb') as f: pickle.dump(obj,f) model = L.Classifier(MnistModel()) optimizer = chainer.optimizers.Adam() optimizer.setup(model) train, test = chainer.datasets.get_mnist() train_iter = chainer.iterators.SerialIterator(train, 100) test_iter = chainer.iterators.SerialIterator(test, 100,repeat=False, shuffle=False) updater = training.StandardUpdater(train_iter, optimizer, device=-1) trainer = training.Trainer(updater, (100, 'epoch'), out="result") trainer.extend(extensions.Evaluator(test_iter, model, device=-1)) trainer.extend(extensions.LogReport()) trainer.extend(extensions.PrintReport( ['epoch', 'main/loss', 'validation/main/loss', 'main/accuracy', 'validation/main/accuracy'])) trainer.extend(extensions.ProgressBar()) trainer.extend(extensions.snapshot_object(model,savefun=save_pkl,filename='model.pkl',trigger=MinValueTrigger(key='validation/main/loss'))) trainer.run()
device=args.gpu, converter=convert_seq) trigger = chainer.training.triggers.MaxValueTrigger( key='validation/main/accuracy', trigger=(1, 'epoch')) trainer.extend(evaluator, trigger=(1, 'epoch')) trainer.extend(extensions.LogReport( log_name='log/domain-{0}_case-{1}.log'.format(domain, case)), trigger=(1, 'epoch')) trainer.extend( extensions.PrintReport([ 'epoch', 'main/loss', 'main/accuracy', 'validation/main/loss', 'validation/main/accuracy', 'elapsed_time' ])) trainer.extend(extensions.snapshot_object( model, savefun=serializers.save_npz, filename='domain-{0}_case-{1}_epoch-{{.updater.epoch}}.npz'.format( domain, case)), trigger=trigger) trainer.extend( extensions.PlotReport( ['main/accuracy', 'validation/main/accuracy'], file_name='accuracy_domain-{0}_case-{1}.png'.format(domain, case), x_key='epoch')) trainer.extend( extensions.PlotReport(['main/loss', 'validation/main/loss'], file_name='loss_domain-{0}_case-{1}.png'.format( domain, case), x_key='epoch')) trainer.extend(extensions.ProgressBar(update_interval=10)) trainer.run()
def train_model(): parser = argparse.ArgumentParser() parser.add_argument('dataset', help='Path to directory containing train.txt, val.txt, and mean.npy') parser.add_argument('images', help='Root directory of input images') parser.add_argument('labels', help='Root directory of label images') parser.add_argument('--batchsize', '-b', type=int, default=16, help='Number of images in each mini-batch') parser.add_argument('--test-batchsize', '-B', type=int, default=4, help='Number of images in each test mini-batch') parser.add_argument('--epoch', '-e', type=int, default=50, help='Number of sweeps over the dataset to train') parser.add_argument('--frequency', '-f', type=int, default=1, help='Frequency of taking a snapshot') parser.add_argument('--gpu', '-g', type=int, default=0, help='GPU ID (negative value indicates CPU)') parser.add_argument('--out', '-o', default='logs', help='Directory to output the result under "models" directory') parser.add_argument('--resume', '-r', default='', help='Resume the training from snapshot') parser.add_argument('--noplot', dest='plot', action='store_false', help='Disable PlotReport extension') parser.add_argument('--tcrop', type=int, default=400, help='Crop size for train-set images') parser.add_argument('--vcrop', type=int, default=480, help='Crop size for validation-set images') args = parser.parse_args() assert (args.tcrop % 16 == 0) and (args.vcrop % 16 == 0), "tcrop and vcrop must be divisible by 16." if args.gpu < 0: from tboard_logger_cpu import TensorboardLogger else: from tboard_logger import TensorboardLogger print('GPU: {}'.format(args.gpu)) print('# Minibatch-size: {}'.format(args.batchsize)) print('# Crop-size: {}'.format(args.tcrop)) print('# epoch: {}'.format(args.epoch)) print('') this_dir = os.path.dirname(os.path.abspath(__file__)) models_dir = os.path.normpath(os.path.join(this_dir, "../../models")) log_dir = os.path.join(models_dir, args.out) writer = SummaryWriter(log_dir=log_dir) # Set up a neural network to train # Classifier reports softmax cross entropy loss and accuracy at every # iteration, which will be used by the PrintReport extension below. model = UNet() if args.gpu >= 0: # Make a specified GPU current chainer.cuda.get_device_from_id(args.gpu).use() model.to_gpu() # Copy the model to the GPU # Setup an optimizer optimizer = chainer.optimizers.Adam() optimizer.setup(model) # Load mean image mean = np.load(os.path.join(args.dataset, "mean.npy")) # Load the MNIST dataset train = LabeledImageDataset(os.path.join(args.dataset, "train.txt"), args.images, args.labels, mean=mean, crop_size=args.tcrop, test=False, distort=False) test = LabeledImageDataset (os.path.join(args.dataset, "val.txt"), args.images, args.labels, mean=mean, crop_size=args.vcrop, test=True, distort=False) train_iter = chainer.iterators.SerialIterator(train, args.batchsize) test_iter = chainer.iterators.SerialIterator(test, args.test_batchsize, repeat=False, shuffle=False) # Set up a trainer updater = training.StandardUpdater( train_iter, optimizer, device=args.gpu) trainer = training.Trainer(updater, (args.epoch, 'epoch'), out=log_dir) # Evaluate the model with the test dataset for each epoch trainer.extend(extensions.Evaluator(test_iter, model, device=args.gpu)) # Dump a computational graph from 'loss' variable at the first iteration # The "main" refers to the target link of the "main" optimizer. trainer.extend(extensions.dump_graph('main/loss')) # Take a snapshot for each specified epoch frequency = args.epoch if args.frequency == -1 else max(1, args.frequency) trainer.extend(extensions.snapshot(), trigger=(frequency, 'epoch')) # Save trained model for each specific epoch trainer.extend(extensions.snapshot_object( model, 'model_iter_{.updater.iteration}'), trigger=(frequency, 'epoch')) # Write a log of evaluation statistics for each epoch trainer.extend(extensions.LogReport()) # Save two plot images to the result dir if args.plot and extensions.PlotReport.available(): trainer.extend( extensions.PlotReport(['main/loss', 'validation/main/loss'], 'epoch', file_name='loss.png')) trainer.extend( extensions.PlotReport( ['main/accuracy', 'validation/main/accuracy'], 'epoch', file_name='accuracy.png')) # Print selected entries of the log to stdout # Here "main" refers to the target link of the "main" optimizer again, and # "validation" refers to the default name of the Evaluator extension. # Entries other than 'epoch' are reported by the Classifier link, called by # either the updater or the evaluator. trainer.extend(extensions.PrintReport( ['epoch', 'main/loss', 'validation/main/loss', 'main/accuracy', 'validation/main/accuracy', 'elapsed_time'])) # Print a progress bar to stdout trainer.extend(extensions.ProgressBar()) # Write training log to TensorBoard log file trainer.extend(TensorboardLogger(writer, ['main/loss', 'validation/main/loss', 'main/accuracy', 'validation/main/accuracy'])) if args.resume: # Resume from a snapshot chainer.serializers.load_npz(args.resume, trainer) # Run the training trainer.run()
def main(): parser = argparse.ArgumentParser( description='Faster R-CNN Chainer version') parser.add_argument('--gpu', '-g', type=int, default=-1) parser.add_argument('--lr', '-l', type=float, default=1e-3) parser.add_argument('--out', '-o', default='result', help='Output directory') parser.add_argument('--seed', '-s', type=int, default=0) parser.add_argument('--step_size', '-ss', type=int, default=50000) parser.add_argument('--iteration', '-i', type=int, default=70000) args = parser.parse_args() np.random.seed(args.seed) train_data = VOCDataset(split='trainval', year='2007') # test_data = VOCDataset(split='test', year='2007', # use_difficult=True, return_difficult=True) faster_rcnn = FasterRCNNVGG16(n_fg_class=len(voc_detection_label_names), pretrained_model='imagenet') faster_rcnn.use_preset('evaluate') model = FasterRCNNTrainChain(faster_rcnn) if args.gpu >= 0: chainer.cuda.get_device_from_id(args.gpu).use() model.to_gpu() optimizer = chainer.optimizers.MomentumSGD(lr=args.lr, momentum=0.9) optimizer.setup(model) optimizer.add_hook(chainer.optimizer.WeightDecay(rate=0.0005)) train_data = TransformDataset(train_data, Transform(faster_rcnn)) train_iter = chainer.iterators.MultiprocessIterator(train_data, batch_size=1, n_processes=None, shared_mem=100000000) # test_iter = chainer.iterators.SerialIterator( # test_data, batch_size=1, repeat=False, shuffle=False) updater = chainer.training.updater.StandardUpdater(train_iter, optimizer, device=args.gpu) trainer = training.Trainer(updater, (args.iteration, 'iteration'), out=args.out) trainer.extend(extensions.snapshot_object(model.faster_rcnn, 'snapshot_model.npz'), trigger=(args.iteration, 'iteration')) trainer.extend(extensions.ExponentialShift('lr', 0.1), trigger=(args.step_size, 'iteration')) log_interval = 20, 'iteration' plot_interval = 3000, 'iteration' print_interval = 5, 'iteration' trainer.extend(chainer.training.extensions.observe_lr(), trigger=log_interval) trainer.extend(extensions.LogReport(trigger=log_interval)) trainer.extend(extensions.PrintReport([ 'iteration', 'epoch', 'elapsed_time', 'lr', 'main/loss', 'main/roi_loc_loss', 'main/roi_cls_loss', 'main/rpn_loc_loss', 'main/rpn_cls_loss', 'validation/main/map', ]), trigger=print_interval) trainer.extend(extensions.ProgressBar(update_interval=10)) if extensions.PlotReport.available(): trainer.extend(extensions.PlotReport(['main/loss'], file_name='loss.png', trigger=plot_interval), trigger=plot_interval) trainer.extend(extensions.dump_graph('main/loss')) trainer.run()
def main(): # コマンドライン引数の読み込み parser = argparse.ArgumentParser(description='Chainer MNIST') parser.add_argument('--batchsize', '-b', type=int, default=20, help='Batch size') parser.add_argument('--epoch' , '-e', type=int, default=20, help='Epoch') parser.add_argument('--gpu' , '-g', type=int, default=-1, help='GPU ID') parser.add_argument('--out' , '-o', default='result', help='output directory') args = parser.parse_args() print('GPU: {}'.format(args.gpu)) print('# Minibatch-size: {}'.format(args.batchsize)) print('# epoch: {}'.format(args.epoch)) print('') # reading model model = L.Classifier(CNN(), lossfun=F.softmax_cross_entropy) if args.gpu >= 0: chainer.cuda.get_device_from_id(args.gpu).use() model.to_gpu() # adam optimizer optimizer = chainer.optimizers.Adam() optimizer.setup(model) # loading MNIST dataset train, test = chainer.datasets.get_mnist(ndim=3) # Iterator of dataset with Batchsize train_iter = chainer.iterators.SerialIterator(train, args.batchsize) # for training test_iter = chainer.iterators.SerialIterator(test, args.batchsize, repeat=False, shuffle=False) # updater/trainer updater = training.StandardUpdater(train_iter, optimizer, device=args.gpu) trainer = training.Trainer(updater, (args.epoch, 'epoch'), out=args.out) # setup evaluator trainer.extend(extensions.Evaluator(test_iter, model, device=args.gpu)) # plotting mnist-cnn network trainer.extend(extensions.dump_graph('main/loss')) # Reporting # setup log trainer.extend(extensions.LogReport()) # progress plot if extensions.PlotReport.available(): trainer.extend(extensions.PlotReport( ['main/loss', 'validation/main/loss'], 'epoch', file_name='loss.png') ) trainer.extend(extensions.PlotReport( ['main/accuracy', 'validation/main/accuracy'], 'epoch', file_name='accuracy.png') ) # progress console trainer.extend(extensions.PrintReport( ['epoch', 'main/loss', 'validation/main/loss', 'main/accuracy', 'validation/main/accuracy', 'elapsed_time']) ) # Saving at updated test-accuracy trigger = triggers.MaxValueTrigger('validation/main/accuracy', trigger=(1, 'epoch')) trainer.extend(extensions.snapshot_object(model, filename='mnist-cnn-best'), trigger=trigger) # progress bar trainer.extend(extensions.ProgressBar()) # Training trainer.run() # Saving model final serializers.save_npz('mnist-cnn.npz', model)
def create_trainer( config: TrainConfig, project_path: str, updater, model: typing.Dict, eval_func, iterator_test, iterator_train_eval, loss_names, converter=chainer.dataset.convert.concat_examples, log_name='log.txt', ): trainer = chainer.training.Trainer(updater, out=project_path) log_trigger = (config.log_iteration, 'iteration') save_trigger = (config.save_iteration, 'iteration') eval_test_name = 'eval/test' eval_train_name = 'eval/train' snapshot = extensions.snapshot_object(model['encoder'], 'encoder{.updater.iteration}.model') trainer.extend(snapshot, trigger=save_trigger) snapshot = extensions.snapshot_object( model['generator'], 'generator{.updater.iteration}.model') trainer.extend(snapshot, trigger=save_trigger) snapshot = extensions.snapshot_object( model['mismatch_discriminator'], 'mismatch_discriminator{.updater.iteration}.model') trainer.extend(snapshot, trigger=save_trigger) trainer.extend( utility.chainer.dump_graph([ 'encoder/' + loss_names[0], 'generator/' + loss_names[0], 'mismatch_discriminator/' + loss_names[0], ], out_name='main.dot')) def _make_evaluator(iterator): return utility.chainer.NoVariableEvaluator( iterator, target=model, converter=converter, eval_func=eval_func, device=config.gpu, ) trainer.extend(_make_evaluator(iterator_test), name=eval_test_name, trigger=log_trigger) trainer.extend(_make_evaluator(iterator_train_eval), name=eval_train_name, trigger=log_trigger) report_target = [] for evaluator_name in ['', eval_test_name + '/', eval_train_name + '/']: for model_name in [s + '/' for s in model.keys()]: for loss_name in set(loss_names): report_target.append(evaluator_name + model_name + loss_name) trainer.extend(extensions.LogReport(trigger=log_trigger, log_name=log_name)) trainer.extend(extensions.PrintReport(report_target)) return trainer
def train(args): '''Run training''' # seed setting torch.manual_seed(args.seed) # debug mode setting # 0 would be fastest, but 1 seems to be reasonable # by considering reproducability # revmoe type check if args.debugmode < 2: chainer.config.type_check = False logging.info('torch type check is disabled') # use determinisitic computation or not if args.debugmode < 1: torch.backends.cudnn.deterministic = False logging.info('torch cudnn deterministic is disabled') else: torch.backends.cudnn.deterministic = True # check cuda availability if not torch.cuda.is_available(): logging.warning('cuda is not available') # get input and output dimension info with open(args.valid_json, 'rb') as f: valid_json = json.load(f)['utts'] utts = list(valid_json.keys()) idim = int(valid_json[utts[0]]['input'][0]['shape'][1]) odim = int(valid_json[utts[0]]['output'][0]['shape'][1]) logging.info('#input dims : ' + str(idim)) logging.info('#output dims: ' + str(odim)) odim_adv = None if args.adv: odim_adv = int(valid_json[utts[0]]['output'][1]['shape'][1]) logging.info('#output dims adversarial: ' + str(odim_adv)) # specify attention, CTC, hybrid mode if args.mtlalpha == 1.0: mtl_mode = 'ctc' logging.info('Pure CTC mode') elif args.mtlalpha == 0.0: mtl_mode = 'att' logging.info('Pure attention mode') else: mtl_mode = 'mtl' logging.info('Multitask learning mode') # specify model architecture e2e = E2E(idim, odim, args, odim_adv=odim_adv) model = Loss(e2e, args.mtlalpha) if args.rnnlm is not None: rnnlm_args = get_model_conf(args.rnnlm, args.rnnlm_conf) rnnlm = lm_pytorch.ClassifierWithState( lm_pytorch.RNNLM(len(args.char_list), rnnlm_args.layer, rnnlm_args.unit)) torch_load(args.rnnlm, rnnlm) e2e.rnnlm = rnnlm # write model config if not os.path.exists(args.outdir): os.makedirs(args.outdir) model_conf = args.outdir + '/model.json' with open(model_conf, 'wb') as f: logging.info('writing a model config file to ' + model_conf) f.write( json.dumps((idim, odim, odim_adv, vars(args)), indent=4, sort_keys=True).encode('utf_8')) for key in sorted(vars(args).keys()): logging.info('ARGS: ' + key + ': ' + str(vars(args)[key])) reporter = model.reporter # check the use of multi-gpu if args.ngpu > 1: model = torch.nn.DataParallel(model, device_ids=list(range(args.ngpu))) logging.info('batch size is automatically increased (%d -> %d)' % (args.batch_size, args.batch_size * args.ngpu)) args.batch_size *= args.ngpu # set torch device device = torch.device("cuda" if args.ngpu > 0 else "cpu") model = model.to(device) # Setup an optimizer # First distinguish between learning rates if args.ngpu > 1: param_grp = [{ 'params': model.module.predictor.enc.parameters(), 'lr': args.asr_lr }, { 'params': model.module.predictor.dec.parameters(), 'lr': args.asr_lr }, { 'params': model.module.predictor.adv.parameters(), 'lr': args.adv_lr }] else: param_grp = [{ 'params': model.predictor.enc.parameters(), 'lr': args.asr_lr }, { 'params': model.predictor.dec.parameters(), 'lr': args.asr_lr }, { 'params': model.predictor.adv.parameters(), 'lr': args.adv_lr }] if args.opt == 'adadelta': optimizer = torch.optim.Adadelta(param_grp, rho=0.95, eps=args.eps) elif args.opt == 'adam': optimizer = torch.optim.Adam(param_grp) # FIXME: TOO DIRTY HACK setattr(optimizer, "target", reporter) setattr(optimizer, "serialize", lambda s: reporter.serialize(s)) # Setup a converter converter = CustomConverter(e2e.subsample[0]) # read json data with open(args.train_json, 'rb') as f: train_json = json.load(f)['utts'] with open(args.valid_json, 'rb') as f: valid_json = json.load(f)['utts'] # make minibatch list (variable length) train = make_batchset(train_json, args.batch_size, args.maxlen_in, args.maxlen_out, args.minibatches, min_batch_size=args.ngpu if args.ngpu > 1 else 1) valid = make_batchset(valid_json, args.batch_size, args.maxlen_in, args.maxlen_out, args.minibatches, min_batch_size=args.ngpu if args.ngpu > 1 else 1) # hack to make batchsze argument as 1 # actual bathsize is included in a list if args.n_iter_processes > 0: train_iter = chainer.iterators.MultiprocessIterator( TransformDataset(train, converter.transform), batch_size=1, n_processes=args.n_iter_processes, n_prefetch=8, maxtasksperchild=20) valid_iter = chainer.iterators.MultiprocessIterator( TransformDataset(valid, converter.transform), batch_size=1, repeat=False, shuffle=False, n_processes=args.n_iter_processes, n_prefetch=8, maxtasksperchild=20) else: train_iter = chainer.iterators.SerialIterator(TransformDataset( train, converter.transform), batch_size=1) valid_iter = chainer.iterators.SerialIterator(TransformDataset( valid, converter.transform), batch_size=1, repeat=False, shuffle=False) # Prepare adversarial training schedule dictionary adv_schedule = get_advsched(args.adv, args.epochs) # Set up a trainer updater = CustomUpdater(model, args.grad_clip, train_iter, optimizer, converter, device, args.ngpu, adv_schedule=adv_schedule, max_grlalpha=args.grlalpha) trainer = training.Trainer(updater, (args.epochs, 'epoch'), out=args.outdir) # Resume from a snapshot if args.resume: logging.info('resumed from %s' % args.resume) #torch_resume(args.resume, trainer, weight_sharing=args.weight_sharing) torch_resume(args.resume, trainer, weight_sharing=args.weight_sharing, reinit_adv=args.reinit_adv) # Evaluate the model with the test dataset for each epoch trainer.extend( CustomEvaluator(model, valid_iter, reporter, converter, device)) # Save attention weight each epoch if args.num_save_attention > 0 and args.mtlalpha != 1.0: data = sorted(list(valid_json.items())[:args.num_save_attention], key=lambda x: int(x[1]['input'][0]['shape'][1]), reverse=True) if hasattr(model, "module"): att_vis_fn = model.module.predictor.calculate_all_attentions else: att_vis_fn = model.predictor.calculate_all_attentions trainer.extend(PlotAttentionReport(att_vis_fn, data, args.outdir + "/att_ws", converter=converter, device=device), trigger=(1, 'epoch')) # Make a plot for training and validation values trainer.extend( extensions.PlotReport([ 'main/loss', 'validation/main/loss', 'main/loss_ctc', 'validation/main/loss_ctc', 'main/loss_att', 'validation/main/loss_att', 'main/loss_adv', 'validation/main/loss_adv' ], 'epoch', file_name='loss.png')) trainer.extend( extensions.PlotReport([ 'main/acc', 'validation/main/acc', 'main/acc_adv', 'validation/main/acc_adv' ], 'epoch', file_name='acc.png')) # Save best models trainer.extend( extensions.snapshot_object(model, 'model.loss.best', savefun=torch_save), trigger=training.triggers.MinValueTrigger('validation/main/loss')) if mtl_mode is not 'ctc': trainer.extend( extensions.snapshot_object(model, 'model.acc.best', savefun=torch_save), trigger=training.triggers.MaxValueTrigger('validation/main/acc')) # save snapshot which contains model and optimizer states trainer.extend(torch_snapshot(), trigger=(1, 'epoch')) # epsilon decay in the optimizer if args.opt == 'adadelta': if args.criterion == 'acc' and mtl_mode is not 'ctc': trainer.extend(restore_snapshot(model, args.outdir + '/model.acc.best', load_fn=torch_load), trigger=CompareValueTrigger( 'validation/main/acc', lambda best_value, current_value: best_value > current_value)) trainer.extend(adadelta_eps_decay(args.eps_decay), trigger=CompareValueTrigger( 'validation/main/acc', lambda best_value, current_value: best_value > current_value)) elif args.criterion == 'loss': trainer.extend(restore_snapshot(model, args.outdir + '/model.loss.best', load_fn=torch_load), trigger=CompareValueTrigger( 'validation/main/loss', lambda best_value, current_value: best_value < current_value)) trainer.extend(adadelta_eps_decay(args.eps_decay), trigger=CompareValueTrigger( 'validation/main/loss', lambda best_value, current_value: best_value < current_value)) # Write a log of evaluation statistics for each epoch trainer.extend(extensions.LogReport(trigger=(REPORT_INTERVAL, 'iteration'))) report_keys = [ 'epoch', 'iteration', 'main/loss', 'main/loss_ctc', 'main/loss_att', 'validation/main/loss', 'validation/main/loss_ctc', 'validation/main/loss_att', 'main/acc', 'validation/main/acc', 'elapsed_time' ] if args.opt == 'adadelta': trainer.extend(extensions.observe_value( 'eps', lambda trainer: trainer.updater.get_optimizer('main'). param_groups[0]["eps"]), trigger=(REPORT_INTERVAL, 'iteration')) report_keys.append('eps') if args.report_cer: report_keys.append('validation/main/cer') if args.report_wer: report_keys.append('validation/main/wer') if args.adv: report_keys.extend([ 'main/loss_adv', 'main/acc_adv', 'validation/main/loss_adv', 'validation/main/acc_adv' ]) trainer.extend(extensions.PrintReport(report_keys), trigger=(REPORT_INTERVAL, 'iteration')) trainer.extend(extensions.ProgressBar(update_interval=REPORT_INTERVAL)) # Run the training trainer.run()
model, converter=convert_rsd_batch), trigger=parse_trigger(args.log_trigger)) trainer.extend( extensions.LogReport(trigger=parse_trigger(args.log_trigger))) trainer.extend( extensions.PlotReport(['main/loss', 'validation/main/loss'], file_name=args.plot_loss_file, trigger=parse_trigger(args.log_trigger))) trainer.extend( extensions.PlotReport(['main/f1', 'validation/main/f1'], file_name=args.plot_f1_file, trigger=parse_trigger(args.log_trigger))) trainer.extend( extensions.PlotReport(['main/recall', 'validation/main/recall'], file_name=args.plot_recall_file, trigger=parse_trigger(args.log_trigger))) trainer.extend( extensions.PlotReport(['main/precision', 'validation/main/precision'], file_name=args.plot_precision_file, trigger=parse_trigger(args.log_trigger))) trainer.extend(extensions.PrintReport([ 'epoch', 'iteration', 'main/loss', 'validation/main/loss', 'main/f1', 'main/precision', 'main/recall' ]), trigger=parse_trigger(args.print_trigger)) trainer.extend(extensions.snapshot_object(model, args.model_file), trigger=parse_trigger(args.store_model_trigger)) trainer.run()
def main(): parser = argparse.ArgumentParser( description='ChainerCV training example: Faster R-CNN') parser.add_argument('--dataset', choices=('voc07', 'voc0712'), help='The dataset to use: VOC07, VOC07+12', default='voc07') parser.add_argument('--gpu', '-g', type=int, default=-1) parser.add_argument('--lr', '-l', type=float, default=1e-3) parser.add_argument('--out', '-o', default='result', help='Output directory') parser.add_argument('--seed', '-s', type=int, default=0) parser.add_argument('--step_size', '-ss', type=int, default=50000) parser.add_argument('--iteration', '-i', type=int, default=70000) args = parser.parse_args() np.random.seed(args.seed) if args.dataset == 'voc07': train_data = VOCBboxDataset(split='trainval', year='2007') elif args.dataset == 'voc0712': train_data = ConcatenatedDataset( VOCBboxDataset(year='2007', split='trainval'), VOCBboxDataset(year='2012', split='trainval')) test_data = VOCBboxDataset(split='test', year='2007', use_difficult=True, return_difficult=True) faster_rcnn = FasterRCNNVGG16(n_fg_class=len(voc_bbox_label_names), pretrained_model='imagenet') faster_rcnn.use_preset('evaluate') model = FasterRCNNTrainChain(faster_rcnn) if args.gpu >= 0: chainer.cuda.get_device_from_id(args.gpu).use() model.to_gpu() optimizer = chainer.optimizers.MomentumSGD(lr=args.lr, momentum=0.9) optimizer.setup(model) optimizer.add_hook(chainer.optimizer.WeightDecay(rate=0.0005)) train_data = TransformDataset(train_data, Transform(faster_rcnn)) train_iter = chainer.iterators.MultiprocessIterator( train_data, batch_size=1, n_processes=None, shared_mem=100000000) test_iter = chainer.iterators.SerialIterator( test_data, batch_size=1, repeat=False, shuffle=False) updater = chainer.training.updater.StandardUpdater( train_iter, optimizer, device=args.gpu) trainer = training.Trainer( updater, (args.iteration, 'iteration'), out=args.out) trainer.extend( extensions.snapshot_object(model.faster_rcnn, 'snapshot_model.npz'), trigger=(args.iteration, 'iteration')) trainer.extend(extensions.ExponentialShift('lr', 0.1), trigger=(args.step_size, 'iteration')) log_interval = 20, 'iteration' plot_interval = 3000, 'iteration' print_interval = 20, 'iteration' trainer.extend(chainer.training.extensions.observe_lr(), trigger=log_interval) trainer.extend(extensions.LogReport(trigger=log_interval)) trainer.extend(extensions.PrintReport( ['iteration', 'epoch', 'elapsed_time', 'lr', 'main/loss', 'main/roi_loc_loss', 'main/roi_cls_loss', 'main/rpn_loc_loss', 'main/rpn_cls_loss', 'validation/main/map', ]), trigger=print_interval) trainer.extend(extensions.ProgressBar(update_interval=10)) if extensions.PlotReport.available(): trainer.extend( extensions.PlotReport( ['main/loss'], file_name='loss.png', trigger=plot_interval ), trigger=plot_interval ) trainer.extend( DetectionVOCEvaluator( test_iter, model.faster_rcnn, use_07_metric=True, label_names=voc_bbox_label_names), trigger=ManualScheduleTrigger( [args.step_size, args.iteration], 'iteration')) trainer.extend(extensions.dump_graph('main/loss')) trainer.run()
def main(): parser = argparse.ArgumentParser(description='Train Blending GAN') parser.add_argument('--nef', type=int, default=64, help='# of base filters in encoder') parser.add_argument('--ngf', type=int, default=64, help='# of base filters in decoder') parser.add_argument('--nc', type=int, default=3, help='# of output channels in decoder') parser.add_argument('--nBottleneck', type=int, default=4000, help='# of output channels in encoder') parser.add_argument('--ndf', type=int, default=64, help='# of base filters in D') parser.add_argument('--lr_d', type=float, default=0.0002, help='Learning rate for Critic, default=0.0002') parser.add_argument('--lr_g', type=float, default=0.002, help='Learning rate for Generator, default=0.002') parser.add_argument('--beta1', type=float, default=0.5, help='Beta for Adam, default=0.5') parser.add_argument('--l2_weight', type=float, default=0.999, help='Weight for l2 loss, default=0.999') parser.add_argument('--gpu', type=int, default=0, help='GPU ID (negative value indicates CPU)') parser.add_argument('--n_epoch', type=int, default=25, help='# of epochs to train for') parser.add_argument('--data_root', help='Path to dataset') parser.add_argument('--load_size', type=int, default=64, help='Scale image to load_size') parser.add_argument( '--image_size', type=int, default=64, help='The height / width of the input image to network') parser.add_argument('--ratio', type=float, default=0.5, help='Ratio for center square size v.s. image_size') parser.add_argument('--val_ratio', type=float, default=0.05, help='Ratio for validation set v.s. data set') parser.add_argument('--d_iters', type=int, default=5, help='# of D iters per each G iter') parser.add_argument('--clamp_lower', type=float, default=-0.01, help='Lower bound for clipping') parser.add_argument('--clamp_upper', type=float, default=0.01, help='Upper bound for clipping') parser.add_argument('--experiment', default='encoder_decoder_blending_result', help='Where to store samples and models') parser.add_argument('--test_folder', default='samples', help='Where to store test results') parser.add_argument('--workers', type=int, default=10, help='# of data loading workers') parser.add_argument('--batch_size', type=int, default=64, help='Input batch size') parser.add_argument('--test_size', type=int, default=64, help='Batch size for testing') parser.add_argument('--train_samples', type=int, default=150000, help='# of training examples') parser.add_argument('--test_samples', type=int, default=256, help='# of testing examples') parser.add_argument('--manual_seed', type=int, default=5, help='Manul seed') parser.add_argument('--resume', default='', help='Resume the training from snapshot') parser.add_argument('--snapshot_interval', type=int, default=1, help='Interval of snapshot (epochs)') parser.add_argument('--print_interval', type=int, default=1, help='Interval of printing log to console (iteration)') parser.add_argument('--plot_interval', type=int, default=10, help='Interval of plot (iteration)') args = parser.parse_args() random.seed(args.manual_seed) print('Input arguments:') for key, value in vars(args).items(): print('\t{}: {}'.format(key, value)) print('') # Set up G & D print('Create & Init models ...') print('\tInit G network ...') G = EncoderDecoder(args.nef, args.ngf, args.nc, args.nBottleneck, image_size=args.image_size, conv_init=init_conv, bn_init=init_bn) print('\tInit D network ...') D = DCGAN_D(args.image_size, args.ndf, conv_init=init_conv, bn_init=init_bn) if args.gpu >= 0: print('\tCopy models to gpu {} ...'.format(args.gpu)) chainer.cuda.get_device(args.gpu).use() # Make a specified GPU current G.to_gpu() # Copy the model to the GPU D.to_gpu() print('Init models done ...\n') # Setup an optimizer optimizer_d = make_optimizer(D, args.lr_d, args.beta1) optimizer_g = make_optimizer(G, args.lr_g, args.beta1) ######################################################################################################################## # Setup dataset & iterator print('Load images from {} ...'.format(args.data_root)) folders = sorted([ folder for folder in os.listdir(args.data_root) if os.path.isdir(os.path.join(args.data_root, folder)) ]) val_end = int(args.val_ratio * len(folders)) print('\t{} folders in total, {} val folders ...'.format( len(folders), val_end)) trainset = BlendingDataset(args.train_samples, folders[val_end:], args.data_root, args.ratio, args.load_size, args.image_size) valset = BlendingDataset(args.test_samples, folders[:val_end], args.data_root, args.ratio, args.load_size, args.image_size) print('\tTrainset contains {} image files'.format(len(trainset))) print('\tValset contains {} image files'.format(len(valset))) print('') train_iter = chainer.iterators.MultiprocessIterator( trainset, args.batch_size, n_processes=args.workers, n_prefetch=args.workers) ######################################################################################################################## # Set up a trainer updater = EncoderDecoderBlendingUpdater(models=(G, D), args=args, iterator=train_iter, optimizer={ 'main': optimizer_g, 'D': optimizer_d }, device=args.gpu) trainer = training.Trainer(updater, (args.n_epoch, 'epoch'), out=args.experiment) # Snapshot snapshot_interval = (args.snapshot_interval, 'epoch') trainer.extend( extensions.snapshot(filename='snapshot_epoch_{.updater.epoch}.npz'), trigger=snapshot_interval) trainer.extend(extensions.snapshot_object(G, 'g_epoch_{.updater.epoch}.npz'), trigger=snapshot_interval) trainer.extend(extensions.snapshot_object(D, 'd_epoch_{.updater.epoch}.npz'), trigger=snapshot_interval) # Display print_interval = (args.print_interval, 'iteration') trainer.extend(extensions.LogReport(trigger=print_interval)) trainer.extend(extensions.PrintReport( ['iteration', 'main/loss', 'D/loss', 'main/l2_loss']), trigger=print_interval) trainer.extend(extensions.ProgressBar(update_interval=args.print_interval)) trainer.extend(extensions.dump_graph('D/loss', out_name='TrainGraph.dot')) # Plot plot_interval = (args.plot_interval, 'iteration') trainer.extend(extensions.PlotReport(['main/loss'], 'iteration', file_name='loss.png', trigger=plot_interval), trigger=plot_interval) trainer.extend(extensions.PlotReport(['D/loss'], 'iteration', file_name='d_loss.png', trigger=plot_interval), trigger=plot_interval) trainer.extend(extensions.PlotReport(['main/l2_loss'], 'iteration', file_name='l2_loss.png', trigger=plot_interval), trigger=plot_interval) # Eval path = os.path.join(args.experiment, args.test_folder) if not os.path.isdir(path): os.makedirs(path) print('Saving samples to {} ...\n'.format(path)) train_batch = [trainset[idx][0] for idx in range(args.test_size)] train_v = Variable(chainer.dataset.concat_examples(train_batch, args.gpu), volatile='on') trainer.extend(sampler(G, path, train_v, 'fake_samples_train_{}.png'), trigger=plot_interval) val_batch = [valset[idx][0] for idx in range(args.test_size)] val_v = Variable(chainer.dataset.concat_examples(val_batch, args.gpu), volatile='on') trainer.extend(sampler(G, path, val_v, 'fake_samples_val_{}.png'), trigger=plot_interval) if args.resume: # Resume from a snapshot print('Resume from {} ... \n'.format(args.resume)) chainer.serializers.load_npz(args.resume, trainer) # Run the training print('Training start ...\n') trainer.run()
) # set dataset, model and optimizer train, test = chainer.datasets.get_mnist() model = chainer.links.Classifier(MnistMLP()) if os.path.isfile(MODEL_PATH): chainer.serializers.load_npz(MODEL_PATH, model) optimizer = chainer.optimizers.Adam() optimizer.setup(model) # set evaluation model eval_model = model.copy() eval_model.train = False # train and test train_iter = chainer.iterators.SerialIterator(train, 100) test_iter = chainer.iterators.SerialIterator(test, 100,repeat=False, shuffle=False) updater = chainer.training.StandardUpdater(train_iter, optimizer, device=-1) trainer = chainer.training.Trainer(updater, (10, 'epoch'), out=DESKTOP_PATH + "/result") trainer.extend(extensions.Evaluator(test_iter, eval_model, device=-1)) trainer.extend(extensions.LogReport()) trainer.extend(extensions.PrintReport( ['epoch', 'main/loss', 'validation/main/loss', 'main/accuracy', 'validation/main/accuracy'])) trainer.extend(extensions.ProgressBar(update_interval=5)) trainer.extend(extensions.snapshot()) trainer.extend(extensions.snapshot_object(model, 'model_iter_{.updater.iteration}')) trainer.extend(extensions.dump_graph('main/loss')) if os.path.isfile(RESUME_PATH): chainer.serializers.load_npz(RESUME_PATH, trainer) trainer.run()
def main(): # keyboard arguments parser = argparse.ArgumentParser( description='Chainer example: WordClassification') parser.add_argument('--batchsize', '-b', type=int, default=128, help='Number of images in each mini-batch') parser.add_argument('--epoch', '-e', type=int, default=10, help='Number of sweeps over the dataset to train') parser.add_argument('--gpu', '-g', type=int, default=-1, help='GPU ID (negative value indicates CPU)') parser.add_argument('--out', '-o', default='result', help='Directory to output the result') parser.add_argument('--unit', '-u', type=int, default=256, help='Number of units') parser.add_argument('--window', '-w', type=int, default=20, help='Window Size') parser.add_argument('--max-length', type=int, default=200, help='Maximum sentence length') args = parser.parse_args() # load data DATA_DIR = '/baobab/kiyomaru/2018-shinjin/jumanpp.midasi' PATH_TO_TRAIN = os.path.join(DATA_DIR, 'train.csv') PATH_TO_WE = '/share/data/word2vec/2016.08.02/w2v.midasi.256.100K.bin' train_x, train_y = load_data(PATH_TO_TRAIN) word_vectors = KeyedVectors.load_word2vec_format(PATH_TO_WE, binary=True) word2index = {} for index, word in enumerate(word_vectors.index2word): word2index[word] = index # convert document to ids train_ids = assign_id_to_document(train_x, word2index, args.max_length) # validation train_ids, valid_ids = train_ids[ VALIDATION_SIZE:], train_ids[:VALIDATION_SIZE] train_y, valid_y = train_y[VALIDATION_SIZE:], train_y[:VALIDATION_SIZE] # define a model train = chainer.datasets.TupleDataset(train_ids, train_y) valid = chainer.datasets.TupleDataset(valid_ids, valid_y) model = LEAM(n_vocab=len(word2index), n_embed=word_vectors.vector_size, n_units=args.unit, n_class=4, n_window=args.window, W=word_vectors.vectors) model.embed.disable_update() model = LeamClassifier(model) if args.gpu >= 0: # Make a specified GPU current chainer.backends.cuda.get_device_from_id(args.gpu).use() model.to_gpu() # Copy the model to the GPU optimizer = chainer.optimizers.Adam() optimizer.setup(model) train_iter = chainer.iterators.SerialIterator(train, args.batchsize) valid_iter = chainer.iterators.SerialIterator(valid, args.batchsize, repeat=False, shuffle=False) updater = training.updaters.StandardUpdater(train_iter, optimizer, device=args.gpu) trainer = training.Trainer(updater, (args.epoch, 'epoch'), out=args.out) trainer.extend(extensions.LogReport()) trainer.extend( extensions.PrintReport([ 'epoch', 'main/loss', 'validation/main/loss', 'main/accuracy', 'validation/main/accuracy', 'elapsed_time' ])) trainer.extend(extensions.Evaluator(valid_iter, model, device=args.gpu)) trainer.extend(extensions.snapshot_object(model, 'best_model'), trigger=chainer.training.triggers.MinValueTrigger( 'validation/main/loss')) trainer.run()
def main(): parser = argparse.ArgumentParser() parser.add_argument('--model', choices=('ssd300', 'ssd512'), default='ssd300') parser.add_argument('--batchsize', type=int, default=32) parser.add_argument('--labelnum', type=int, default=50) parser.add_argument('--gpu', type=int, default=-1) parser.add_argument('--out', default='result') parser.add_argument('--resume') parser.add_argument('--image_label', '-il', help='Path to training image-label list file') parser.add_argument('--bbox', help='Path to training bbox list file') parser.add_argument('--image_label_test', '-ilt', help='Path to training image-label list file') parser.add_argument('--bbox_test', help='Path to training bbox list file') parser.add_argument('--image_root', '-TR', default='.', help='Root directory path of image files') args = parser.parse_args() comm = chainermn.create_communicator('naive') if comm.mpi_comm.rank == 0: print('==========================================') print('Num process (COMM_WORLD): {}'.format(MPI.COMM_WORLD.Get_size())) if args.model == 'ssd300': model = SSD300(n_fg_class=args.labelnum, pretrained_model='imagenet') elif args.model == 'ssd512': model = SSD512(n_fg_class=args.labelnum, pretrained_model='imagenet') model.use_preset('evaluate') train_chain = MultiboxTrainChain(model) if args.gpu >= 0: chainer.cuda.get_device_from_id(args.gpu).use() model.to_gpu() from test_datasets import DeepFashionBboxDataset if comm.rank == 0: train = DeepFashionBboxDataset(args.bbox, args.image_label, args.image_root) test = DeepFashionBboxDataset(args.bbox_test, args.image_label_test, args.image_root) train = TransformDataset( train, Transform(model.coder, model.insize, model.mean)) else: train, test = None, None train = chainermn.scatter_dataset(train, comm, shuffle=True) test = chainermn.scatter_dataset(test, comm, shuffle=True) train_iter = chainer.iterators.MultiprocessIterator(train, args.batchsize) test_iter = chainer.iterators.SerialIterator(test, args.batchsize, repeat=False, shuffle=False) # initial lr is set to 1e-3 by ExponentialShift optimizer = chainer.optimizers.MomentumSGD() optimizer = chainermn.create_multi_node_optimizer(optimizer, comm) optimizer.setup(train_chain) for param in train_chain.params(): if param.name == 'b': param.update_rule.add_hook(GradientScaling(2)) else: param.update_rule.add_hook(WeightDecay(0.0005)) updater = training.StandardUpdater(train_iter, optimizer, device=args.gpu) trainer = training.Trainer(updater, (120000, 'iteration'), args.out) checkpoint_interval = (1000, 'iteration') checkpointer = chainermn.create_multi_node_checkpointer( name='imagenet-example', comm=comm) checkpointer.maybe_load(trainer, optimizer) trainer.extend(checkpointer, trigger=checkpoint_interval) trainer.extend(extensions.ExponentialShift('lr', 0.1, init=1e-3), trigger=triggers.ManualScheduleTrigger([80000, 100000], 'iteration')) evaluator = DetectionVOCEvaluator(test_iter, model, use_07_metric=True, label_names=voc_bbox_label_names) evaluator = chainermn.create_multi_node_evaluator(evaluator, comm) trainer.extend(evaluator, trigger=(10000, 'iteration')) if comm.rank == 0: log_interval = 10, 'iteration' trainer.extend(extensions.LogReport(trigger=log_interval)) trainer.extend(extensions.observe_lr(), trigger=log_interval) trainer.extend(extensions.PrintReport([ 'epoch', 'iteration', 'lr', 'main/loss', 'main/loss/loc', 'main/loss/conf', 'validation/main/map' ]), trigger=log_interval) trainer.extend(extensions.ProgressBar(update_interval=10)) trainer.extend(extensions.snapshot(), trigger=(10000, 'iteration')) trainer.extend(extensions.snapshot_object( model, 'model_iter_{.updater.iteration}'), trigger=(120000, 'iteration')) if args.resume: serializers.load_npz(args.resume, trainer) trainer.run()
def main(): ''' main function, start point ''' # 引数関連 parser = argparse.ArgumentParser() parser.add_argument('--batchsize', '-b', type=int, default=128, help='Number of images in each mini-batch') parser.add_argument('--learnrate', '-l', type=float, default=0.01, help='Learning rate for SGD') parser.add_argument('--epoch', '-e', type=int, default=10, help='Number of sweeps over the dataset to train') parser.add_argument('--gpu', '-g', type=int, default=0, help='GPU ID (negative value indicates CPU)') parser.add_argument('--resume', '-r', default='', help='Resume the training from snapshot') parser.add_argument('--iter_parallel', action='store_true', default=False, help='filter(kernel) sizes') args = parser.parse_args() # parameter出力 print("-=Learning Parameter=-") print("# Max Epochs: {}".format(args.epoch)) print("# Batch Size: {}".format(args.batchsize)) print("# Learning Rate: {}".format(args.learnrate)) print('# Train Dataet: General 100') print('# Test Dataet: Set 14') if args.iter_parallel: print("# Data Iters that loads in Parallel") print("\n") # 保存ディレクトリ # save didrectory outdir = path.join(ROOT_PATH, 'results/deconv_res_Test') if not path.exists(outdir): os.makedirs(outdir) with open(path.join(outdir, 'arg_param.txt'), 'w') as f: for k, v in args.__dict__.items(): f.write('{}:{}\n'.format(k, v)) print('# loading dataet(General100, Set14) ...') train, test = load_dataset() # prepare model model = GenEvaluator(DRLSR()) if args.gpu >= 0: chainer.cuda.get_device_from_id(args.gpu).use() model.to_gpu() # setup optimizer optimizer = chainer.optimizers.MomentumSGD(lr=args.learnrate, momentum=0.9) optimizer.setup(model) optimizer.add_hook(chainer.optimizer.WeightDecay(0.0001)) optimizer.add_hook(chainer.optimizer.GradientClipping(0.1)) # setup iter if args.iter_parallel: train_iter = chainer.iterators.MultiprocessIterator(train, args.batchsize, n_processes=8) test_iter = chainer.iterators.MultiprocessIterator(test, args.batchsize, repeat=False, shuffle=False, n_processes=8) else: train_iter = chainer.iterators.SerialIterator(train, args.batchsize) test_iter = chainer.iterators.SerialIterator(test, args.batchsize, repeat=False, shuffle=False) # setup trainer updater = training.StandardUpdater(train_iter, optimizer, device=args.gpu) trainer = training.Trainer(updater, (args.epoch, 'epoch'), out=outdir) # eval test data trainer.extend(extensions.Evaluator(test_iter, model, device=args.gpu)) # dump loss graph trainer.extend(extensions.dump_graph('main/loss')) # lr shift trainer.extend(extensions.ExponentialShift("lr", 0.1), trigger=(100, 'epoch')) # save snapshot trainer.extend(extensions.snapshot(), trigger=(10, 'epoch')) trainer.extend(extensions.snapshot_object( model, 'model_snapshot_{.updater.epoch}'), trigger=(10, 'epoch')) # log report trainer.extend(extensions.LogReport()) trainer.extend(extensions.observe_lr(), trigger=(1, 'epoch')) # plot loss graph trainer.extend( extensions.PlotReport(['main/loss', 'validation/main/loss'], 'epoch', file_name='loss.png')) # plot acc graph trainer.extend( extensions.PlotReport(['main/PSNR', 'validation/main/PSNR'], 'epoch', file_name='PSNR.png')) # print info trainer.extend( extensions.PrintReport([ 'epoch', 'main/loss', 'validation/main/loss', 'main/PSNR', 'validation/main/PSNR', 'lr', 'elapsed_time' ])) # print progbar trainer.extend(extensions.ProgressBar()) trainer.run()
num_iterations=args.test_iterations, converter=get_concat_and_pad_examples( args.blank_label)), (args.test_interval, 'iteration')) epoch_validation_iterator = copy.copy(validation_iterator) epoch_validation_iterator._repeat = False # 该评估器接受所有的验证图像,对每个图像进行评估,并报告所有验证图像的验证度量。 epoch_evaluator = (chainer.training.extensions.Evaluator( epoch_validation_iterator, model, device=updater._devices[0], converter=get_concat_and_pad_examples(args.blank_label), ), (1, 'epoch')) model_snapshotter = (extensions.snapshot_object( net, 'model_{.updater.iteration}.npz'), (args.snapshot_interval, 'iteration')) # bbox plotter test if not args.test_image: test_image = validation_dataset.get_example(0)[0] else: test_image = train_dataset.load_image(args.test_image) bbox_plotter = (TextRecBBOXPlotter( test_image, os.path.join(log_dir, 'boxes'), target_shape, metrics, send_bboxes=args.send_bboxes, upstream_port=args.port,
def train(args): """Train with the given args :param Namespace args: The program arguments """ # display chainer version logging.info('chainer version = ' + chainer.__version__) set_deterministic_chainer(args) # check cuda and cudnn availability if not chainer.cuda.available: logging.warning('cuda is not available') if not chainer.cuda.cudnn_enabled: logging.warning('cudnn is not available') # get input and output dimension info with open(args.valid_json, 'rb') as f: valid_json = json.load(f)['utts'] utts = list(valid_json.keys()) idim = int(valid_json[utts[0]]['input'][0]['shape'][1]) odim = int(valid_json[utts[0]]['output'][0]['shape'][1]) logging.info('#input dims : ' + str(idim)) logging.info('#output dims: ' + str(odim)) # check attention type if args.atype not in ['noatt', 'dot', 'location']: raise NotImplementedError('chainer supports only noatt, dot, and location attention.') # specify attention, CTC, hybrid mode if args.mtlalpha == 1.0: mtl_mode = 'ctc' logging.info('Pure CTC mode') elif args.mtlalpha == 0.0: mtl_mode = 'att' logging.info('Pure attention mode') else: mtl_mode = 'mtl' logging.info('Multitask learning mode') # specify model architecture logging.info('import model module: ' + args.model_module) from importlib import import_module model_module = import_module(args.model_module) model = model_module.E2E(idim, odim, args, flag_return=False) # write model config if not os.path.exists(args.outdir): os.makedirs(args.outdir) model_conf = args.outdir + '/model.json' with open(model_conf, 'wb') as f: logging.info('writing a model config file to ' + model_conf) f.write(json.dumps((idim, odim, vars(args)), indent=4, sort_keys=True).encode('utf_8')) for key in sorted(vars(args).keys()): logging.info('ARGS: ' + key + ': ' + str(vars(args)[key])) # Set gpu ngpu = args.ngpu if ngpu == 1: gpu_id = 0 # Make a specified GPU current chainer.cuda.get_device_from_id(gpu_id).use() model.to_gpu() # Copy the model to the GPU logging.info('single gpu calculation.') elif ngpu > 1: gpu_id = 0 devices = {'main': gpu_id} for gid in six.moves.xrange(1, ngpu): devices['sub_%d' % gid] = gid logging.info('multi gpu calculation (#gpus = %d).' % ngpu) logging.info('batch size is automatically increased (%d -> %d)' % ( args.batch_size, args.batch_size * args.ngpu)) else: gpu_id = -1 logging.info('cpu calculation') # Setup an optimizer if args.opt == 'adadelta': optimizer = chainer.optimizers.AdaDelta(eps=args.eps) elif args.opt == 'adam': optimizer = chainer.optimizers.Adam() elif args.opt == 'noam': optimizer = chainer.optimizers.Adam(alpha=0, beta1=0.9, beta2=0.98, eps=1e-9) optimizer.setup(model) optimizer.add_hook(chainer.optimizer.GradientClipping(args.grad_clip)) # read json data with open(args.train_json, 'rb') as f: train_json = json.load(f)['utts'] with open(args.valid_json, 'rb') as f: valid_json = json.load(f)['utts'] # set up training iterator and updater converter = CustomConverter(subsampling_factor=model.subsample[0], preprocess_conf=args.preprocess_conf) use_sortagrad = args.sortagrad == -1 or args.sortagrad > 0 accum_grad = args.accum_grad if ngpu <= 1: # make minibatch list (variable length) train = make_batchset(train_json, args.batch_size, args.maxlen_in, args.maxlen_out, args.minibatches, shortest_first=use_sortagrad) # hack to make batchsize argument as 1 # actual batchsize is included in a list if args.n_iter_processes > 0: train_iters = [ToggleableShufflingMultiprocessIterator( TransformDataset(train, converter.transform), batch_size=1, n_processes=args.n_iter_processes, n_prefetch=8, maxtasksperchild=20, shuffle=not use_sortagrad)] else: train_iters = [ToggleableShufflingSerialIterator( TransformDataset(train, converter.transform), batch_size=1, shuffle=not use_sortagrad)] # set up updater updater = CustomUpdater( train_iters[0], optimizer, converter=converter, device=gpu_id, accum_grad=accum_grad) else: # set up minibatches train_subsets = [] for gid in six.moves.xrange(ngpu): # make subset train_json_subset = {k: v for i, (k, v) in enumerate(train_json.items()) if i % ngpu == gid} # make minibatch list (variable length) train_subsets += [make_batchset(train_json_subset, args.batch_size, args.maxlen_in, args.maxlen_out, args.minibatches)] # each subset must have same length for MultiprocessParallelUpdater maxlen = max([len(train_subset) for train_subset in train_subsets]) for train_subset in train_subsets: if maxlen != len(train_subset): for i in six.moves.xrange(maxlen - len(train_subset)): train_subset += [train_subset[i]] # hack to make batchsize argument as 1 # actual batchsize is included in a list if args.n_iter_processes > 0: train_iters = [ToggleableShufflingMultiprocessIterator( TransformDataset(train_subsets[gid], converter.transform), batch_size=1, n_processes=args.n_iter_processes, n_prefetch=8, maxtasksperchild=20, shuffle=not use_sortagrad) for gid in six.moves.xrange(ngpu)] else: train_iters = [ToggleableShufflingSerialIterator( TransformDataset(train_subsets[gid], converter.transform), batch_size=1, shuffle=not use_sortagrad) for gid in six.moves.xrange(ngpu)] # set up updater updater = CustomParallelUpdater( train_iters, optimizer, converter=converter, devices=devices) # Set up a trainer trainer = training.Trainer( updater, (args.epochs, 'epoch'), out=args.outdir) if use_sortagrad: trainer.extend(ShufflingEnabler(train_iters), trigger=(args.sortagrad if args.sortagrad != -1 else args.epochs, 'epoch')) if args.opt == 'noam': trainer.extend(model_module.VaswaniRule('alpha', d=args.adim, warmup_steps=args.transformer_warmup_steps, scale=args.transformer_lr), trigger=(1, 'iteration')) # Resume from a snapshot if args.resume: chainer.serializers.load_npz(args.resume, trainer) # set up validation iterator valid = make_batchset(valid_json, args.batch_size, args.maxlen_in, args.maxlen_out, args.minibatches) if args.n_iter_processes > 0: valid_iter = chainer.iterators.MultiprocessIterator( TransformDataset(valid, converter.transform), batch_size=1, repeat=False, shuffle=False, n_processes=args.n_iter_processes, n_prefetch=8, maxtasksperchild=20) else: valid_iter = chainer.iterators.SerialIterator( TransformDataset(valid, converter.transform), batch_size=1, repeat=False, shuffle=False) # Evaluate the model with the test dataset for each epoch trainer.extend(extensions.Evaluator( valid_iter, model, converter=converter, device=gpu_id)) # Save attention weight each epoch if args.num_save_attention > 0 and args.mtlalpha != 1.0: data = sorted(list(valid_json.items())[:args.num_save_attention], key=lambda x: int(x[1]['input'][0]['shape'][1]), reverse=True) if hasattr(model, "module"): att_vis_fn = model.module.calculate_all_attentions else: att_vis_fn = model.calculate_all_attentions try: PlotAttentionReport = model_module.PlotAttentionReport logging.info('Using custom PlotAttentionReport') except AttributeError: from espnet.asr.asr_utils import PlotAttentionReport att_reporter = PlotAttentionReport( att_vis_fn, data, args.outdir + "/att_ws", converter=converter, device=gpu_id) trainer.extend(att_reporter, trigger=(1, 'epoch')) else: att_reporter = None # Take a snapshot for each specified epoch trainer.extend(extensions.snapshot(filename='snapshot.ep.{.updater.epoch}'), trigger=(1, 'epoch')) # Make a plot for training and validation values trainer.extend(extensions.PlotReport(['main/loss', 'validation/main/loss', 'main/loss_ctc', 'validation/main/loss_ctc', 'main/loss_att', 'validation/main/loss_att'], 'epoch', file_name='loss.png')) trainer.extend(extensions.PlotReport(['main/acc', 'validation/main/acc'], 'epoch', file_name='acc.png')) # Save best models trainer.extend(extensions.snapshot_object(model, 'model.loss.best'), trigger=training.triggers.MinValueTrigger('validation/main/loss')) if mtl_mode is not 'ctc': trainer.extend(extensions.snapshot_object(model, 'model.acc.best'), trigger=training.triggers.MaxValueTrigger('validation/main/acc')) # epsilon decay in the optimizer if args.opt == 'adadelta': if args.criterion == 'acc' and mtl_mode is not 'ctc': trainer.extend(restore_snapshot(model, args.outdir + '/model.acc.best'), trigger=CompareValueTrigger( 'validation/main/acc', lambda best_value, current_value: best_value > current_value)) trainer.extend(adadelta_eps_decay(args.eps_decay), trigger=CompareValueTrigger( 'validation/main/acc', lambda best_value, current_value: best_value > current_value)) elif args.criterion == 'loss': trainer.extend(restore_snapshot(model, args.outdir + '/model.loss.best'), trigger=CompareValueTrigger( 'validation/main/loss', lambda best_value, current_value: best_value < current_value)) trainer.extend(adadelta_eps_decay(args.eps_decay), trigger=CompareValueTrigger( 'validation/main/loss', lambda best_value, current_value: best_value < current_value)) # Write a log of evaluation statistics for each epoch trainer.extend(extensions.LogReport(trigger=(REPORT_INTERVAL, 'iteration'))) report_keys = ['epoch', 'iteration', 'main/loss', 'main/loss_ctc', 'main/loss_att', 'validation/main/loss', 'validation/main/loss_ctc', 'validation/main/loss_att', 'main/acc', 'validation/main/acc', 'elapsed_time'] if args.opt == 'adadelta': trainer.extend(extensions.observe_value( 'eps', lambda trainer: trainer.updater.get_optimizer('main').eps), trigger=(REPORT_INTERVAL, 'iteration')) report_keys.append('eps') trainer.extend(extensions.PrintReport( report_keys), trigger=(REPORT_INTERVAL, 'iteration')) trainer.extend(extensions.ProgressBar(update_interval=REPORT_INTERVAL)) set_early_stop(trainer, args) if args.tensorboard_dir is not None and args.tensorboard_dir != "": writer = SummaryWriter(args.tensorboard_dir) trainer.extend(TensorboardLogger(writer, att_reporter)) # Run the training trainer.run() check_early_stop(trainer, args.epochs)
def train(args, train_data, test_data, evaluator_type): required_args = [ 'dataset', 'class_names', 'logs_dir', 'min_size', 'max_size', 'anchor_scales', ] for arg_key in required_args: if not hasattr(args, arg_key): raise ValueError( 'args must contain required key: {}'.format(arg_key)) assert evaluator_type in ['voc', 'coco'], \ 'Unsupported evaluator_type: {}'.format(evaluator_type) if args.multi_node: import chainermn comm = chainermn.create_communicator('hierarchical') device = comm.intra_rank args.n_node = comm.inter_size args.n_gpu = comm.size chainer.cuda.get_device_from_id(device).use() else: if args.gpu is None: print( 'Option --gpu is required without --multi-node.', file=sys.stderr, ) sys.exit(1) args.n_node = 1 args.n_gpu = 1 chainer.cuda.get_device_from_id(args.gpu).use() device = args.gpu args.seed = 0 now = datetime.datetime.now() args.timestamp = now.isoformat() args.out = osp.join(args.logs_dir, now.strftime('%Y%m%d_%H%M%S')) args.batch_size = args.batch_size_per_gpu * args.n_gpu # lr: 0.00125 * 8 = 0.01 in original args.lr = 0.00125 * args.batch_size args.weight_decay = 0.0001 # lr / 10 at 120k iteration with # 160k iteration * 16 batchsize in original args.step_size = [ (120e3 / 180e3) * args.max_epoch, (160e3 / 180e3) * args.max_epoch, ] random.seed(args.seed) np.random.seed(args.seed) if args.pooling_func == 'align': pooling_func = cmr.functions.roi_align_2d elif args.pooling_func == 'pooling': pooling_func = cmr.functions.roi_pooling_2d elif args.pooling_func == 'resize': pooling_func = cmr.functions.crop_and_resize else: raise ValueError('Unsupported pooling_func: {}'.format( args.pooling_func)) if args.initializer == 'normal': mask_initialW = chainer.initializers.Normal(0.01) elif args.initializer == 'he_normal': mask_initialW = chainer.initializers.HeNormal(fan_option='fan_out') else: raise ValueError('Unsupported initializer: {}'.format( args.initializer)) if args.model == 'vgg16': mask_rcnn = cmr.models.MaskRCNNVGG16( n_fg_class=len(args.class_names), pretrained_model='imagenet', pooling_func=pooling_func, anchor_scales=args.anchor_scales, roi_size=args.roi_size, min_size=args.min_size, max_size=args.max_size, mask_initialW=mask_initialW, ) elif args.model in ['resnet50', 'resnet101']: n_layers = int(args.model.lstrip('resnet')) mask_rcnn = cmr.models.MaskRCNNResNet( n_layers=n_layers, n_fg_class=len(args.class_names), pooling_func=pooling_func, anchor_scales=args.anchor_scales, roi_size=args.roi_size, min_size=args.min_size, max_size=args.max_size, mask_initialW=mask_initialW, ) else: raise ValueError('Unsupported model: {}'.format(args.model)) model = cmr.models.MaskRCNNTrainChain(mask_rcnn) if args.multi_node or args.gpu >= 0: model.to_gpu() optimizer = chainer.optimizers.MomentumSGD(lr=args.lr, momentum=0.9) if args.multi_node: optimizer = chainermn.create_multi_node_optimizer(optimizer, comm) optimizer.setup(model) optimizer.add_hook(chainer.optimizer.WeightDecay(rate=args.weight_decay)) if args.model in ['resnet50', 'resnet101']: # ResNetExtractor.freeze_at is not enough to freeze params # since WeightDecay updates the param little by little. mask_rcnn.extractor.conv1.disable_update() mask_rcnn.extractor.bn1.disable_update() mask_rcnn.extractor.res2.disable_update() for link in mask_rcnn.links(): if isinstance(link, cmr.links.AffineChannel2D): link.disable_update() train_data = chainer.datasets.TransformDataset( train_data, cmr.datasets.MaskRCNNTransform(mask_rcnn), ) test_data = chainer.datasets.TransformDataset( test_data, cmr.datasets.MaskRCNNTransform(mask_rcnn, train=False), ) if args.multi_node: if comm.rank != 0: train_data = None test_data = None train_data = chainermn.scatter_dataset(train_data, comm, shuffle=True) test_data = chainermn.scatter_dataset(test_data, comm) # FIXME: MultiProcessIterator sometimes hangs train_iter = chainer.iterators.SerialIterator( train_data, batch_size=args.batch_size_per_gpu, ) test_iter = chainer.iterators.SerialIterator( test_data, batch_size=args.batch_size_per_gpu, repeat=False, shuffle=False, ) converter = functools.partial( cmr.datasets.concat_examples, padding=0, # img, bboxes, labels, masks, scales indices_concat=[0, 2, 3, 4], # img, _, labels, masks, scales indices_to_device=[0, 1], # img, bbox ) updater = chainer.training.updater.StandardUpdater( train_iter, optimizer, device=device, converter=converter, ) trainer = training.Trainer( updater, (args.max_epoch, 'epoch'), out=args.out, ) trainer.extend( extensions.ExponentialShift('lr', 0.1), trigger=training.triggers.ManualScheduleTrigger( args.step_size, 'epoch', ), ) eval_interval = 1, 'epoch' log_interval = 20, 'iteration' plot_interval = 0.1, 'epoch' print_interval = 20, 'iteration' if evaluator_type == 'voc': evaluator = cmr.extensions.InstanceSegmentationVOCEvaluator( test_iter, model.mask_rcnn, device=device, use_07_metric=True, label_names=args.class_names, ) elif evaluator_type == 'coco': evaluator = cmr.extensions.InstanceSegmentationCOCOEvaluator( test_iter, model.mask_rcnn, device=device, label_names=args.class_names, ) else: raise ValueError( 'Unsupported evaluator_type: {}'.format(evaluator_type)) if args.multi_node: evaluator = chainermn.create_multi_node_evaluator(evaluator, comm) trainer.extend(evaluator, trigger=eval_interval) if not args.multi_node or comm.rank == 0: # Save snapshot. trainer.extend( extensions.snapshot_object(model.mask_rcnn, 'snapshot_model.npz'), trigger=training.triggers.MaxValueTrigger( 'validation/main/map', eval_interval, ), ) # Dump params.yaml. args.git_hash = cmr.utils.git_hash() args.hostname = socket.gethostname() trainer.extend(fcn.extensions.ParamsReport(args.__dict__)) # Visualization. trainer.extend( cmr.extensions.InstanceSegmentationVisReport( test_iter, model.mask_rcnn, label_names=args.class_names, ), trigger=eval_interval, ) # Logging. trainer.extend( chainer.training.extensions.observe_lr(), trigger=log_interval, ) trainer.extend(extensions.LogReport(trigger=log_interval)) trainer.extend( extensions.PrintReport([ 'iteration', 'epoch', 'elapsed_time', 'lr', 'main/loss', 'main/roi_loc_loss', 'main/roi_cls_loss', 'main/roi_mask_loss', 'main/rpn_loc_loss', 'main/rpn_cls_loss', 'validation/main/map', ], ), trigger=print_interval, ) trainer.extend(extensions.ProgressBar(update_interval=10)) # Plot. assert extensions.PlotReport.available() trainer.extend( extensions.PlotReport( [ 'main/loss', 'main/roi_loc_loss', 'main/roi_cls_loss', 'main/roi_mask_loss', 'main/rpn_loc_loss', 'main/rpn_cls_loss', ], file_name='loss.png', trigger=plot_interval, ), trigger=plot_interval, ) trainer.extend( extensions.PlotReport( ['validation/main/map'], file_name='accuracy.png', trigger=plot_interval, ), trigger=eval_interval, ) trainer.extend(extensions.dump_graph('main/loss')) trainer.run()
def train(args): '''RUN TRAINING''' # seed setting torch.manual_seed(args.seed) # use determinisitic computation or not if args.debugmode < 1: torch.backends.cudnn.deterministic = False logging.info('torch cudnn deterministic is disabled') else: torch.backends.cudnn.deterministic = True # check cuda availability if not torch.cuda.is_available(): logging.warning('cuda is not available') # get input and output dimension info with open(args.valid_json, 'rb') as f: valid_json = json.load(f)['utts'] utts = list(valid_json.keys()) # reverse input and output dimension idim = int(valid_json[utts[0]]['output'][0]['shape'][1]) odim = int(valid_json[utts[0]]['input'][0]['shape'][1]) if args.use_cbhg: args.spc_dim = int(valid_json[utts[0]]['input'][1]['shape'][1]) if args.use_speaker_embedding: args.spk_embed_dim = int(valid_json[utts[0]]['input'][1]['shape'][0]) else: args.spk_embed_dim = None logging.info('#input dims : ' + str(idim)) logging.info('#output dims: ' + str(odim)) # write model config if not os.path.exists(args.outdir): os.makedirs(args.outdir) model_conf = args.outdir + '/model.json' with open(model_conf, 'wb') as f: logging.info('writing a model config file to' + model_conf) f.write( json.dumps((idim, odim, vars(args)), indent=4, sort_keys=True).encode('utf_8')) for key in sorted(vars(args).keys()): logging.info('ARGS: ' + key + ': ' + str(vars(args)[key])) # specify model architecture tacotron2 = Tacotron2(idim, odim, args) logging.info(tacotron2) # check the use of multi-gpu if args.ngpu > 1: tacotron2 = torch.nn.DataParallel(tacotron2, device_ids=list(range(args.ngpu))) logging.info('batch size is automatically increased (%d -> %d)' % (args.batch_size, args.batch_size * args.ngpu)) args.batch_size *= args.ngpu # set torch device device = torch.device("cuda" if args.ngpu > 0 else "cpu") tacotron2 = tacotron2.to(device) # define loss model = Tacotron2Loss(tacotron2, args.use_masking, args.bce_pos_weight) reporter = model.reporter # Setup an optimizer optimizer = torch.optim.Adam(model.parameters(), args.lr, eps=args.eps, weight_decay=args.weight_decay) # FIXME: TOO DIRTY HACK setattr(optimizer, 'target', reporter) setattr(optimizer, 'serialize', lambda s: reporter.serialize(s)) # Setup a converter converter = CustomConverter(True, args.use_speaker_embedding, args.use_cbhg) # read json data with open(args.train_json, 'rb') as f: train_json = json.load(f)['utts'] with open(args.valid_json, 'rb') as f: valid_json = json.load(f)['utts'] # make minibatch list (variable length) train_batchset = make_batchset( train_json, args.batch_size, args.maxlen_in, args.maxlen_out, args.minibatches, args.batch_sort_key, min_batch_size=args.ngpu if args.ngpu > 1 else 1) valid_batchset = make_batchset( valid_json, args.batch_size, args.maxlen_in, args.maxlen_out, args.minibatches, args.batch_sort_key, min_batch_size=args.ngpu if args.ngpu > 1 else 1) # hack to make batchsze argument as 1 # actual bathsize is included in a list if args.n_iter_processes > 0: train_iter = chainer.iterators.MultiprocessIterator( TransformDataset(train_batchset, converter.transform), batch_size=1, n_processes=args.n_iter_processes, n_prefetch=8, maxtasksperchild=20) valid_iter = chainer.iterators.MultiprocessIterator( TransformDataset(valid_batchset, converter.transform), batch_size=1, repeat=False, shuffle=False, n_processes=args.n_iter_processes, n_prefetch=8, maxtasksperchild=20) else: train_iter = chainer.iterators.SerialIterator(TransformDataset( train_batchset, converter.transform), batch_size=1) valid_iter = chainer.iterators.SerialIterator(TransformDataset( valid_batchset, converter.transform), batch_size=1, repeat=False, shuffle=False) # Set up a trainer updater = CustomUpdater(model, args.grad_clip, train_iter, optimizer, converter, device) trainer = training.Trainer(updater, (args.epochs, 'epoch'), out=args.outdir) # Resume from a snapshot if args.resume: logging.info('resumed from %s' % args.resume) torch_resume(args.resume, trainer) # Evaluate the model with the test dataset for each epoch trainer.extend( CustomEvaluator(model, valid_iter, reporter, converter, device)) # Save snapshot for each epoch trainer.extend(torch_snapshot(), trigger=(1, 'epoch')) # Save best models trainer.extend( extensions.snapshot_object(tacotron2, 'model.loss.best', savefun=torch_save), trigger=training.triggers.MinValueTrigger('validation/main/loss')) # Save attention figure for each epoch if args.num_save_attention > 0: data = sorted(list(valid_json.items())[:args.num_save_attention], key=lambda x: int(x[1]['input'][0]['shape'][1]), reverse=True) if hasattr(tacotron2, "module"): att_vis_fn = tacotron2.module.calculate_all_attentions else: att_vis_fn = tacotron2.calculate_all_attentions trainer.extend(PlotAttentionReport(att_vis_fn, data, args.outdir + '/att_ws', converter=CustomConverter( False, args.use_speaker_embedding), device=device, reverse=True), trigger=(1, 'epoch')) # Make a plot for training and validation values plot_keys = [ 'main/loss', 'validation/main/loss', 'main/l1_loss', 'validation/main/l1_loss', 'main/mse_loss', 'validation/main/mse_loss', 'main/bce_loss', 'validation/main/bce_loss' ] trainer.extend( extensions.PlotReport(['main/l1_loss', 'validation/main/l1_loss'], 'epoch', file_name='l1_loss.png')) trainer.extend( extensions.PlotReport(['main/mse_loss', 'validation/main/mse_loss'], 'epoch', file_name='mse_loss.png')) trainer.extend( extensions.PlotReport(['main/bce_loss', 'validation/main/bce_loss'], 'epoch', file_name='bce_loss.png')) if args.use_cbhg: plot_keys += [ 'main/cbhg_l1_loss', 'validation/main/cbhg_l1_loss', 'main/cbhg_mse_loss', 'validation/main/cbhg_mse_loss' ] trainer.extend( extensions.PlotReport( ['main/cbhg_l1_loss', 'validation/main/cbhg_l1_loss'], 'epoch', file_name='cbhg_l1_loss.png')) trainer.extend( extensions.PlotReport( ['main/cbhg_mse_loss', 'validation/main/cbhg_mse_loss'], 'epoch', file_name='cbhg_mse_loss.png')) trainer.extend( extensions.PlotReport(plot_keys, 'epoch', file_name='loss.png')) # Write a log of evaluation statistics for each epoch trainer.extend(extensions.LogReport(trigger=(REPORT_INTERVAL, 'iteration'))) report_keys = plot_keys[:] report_keys[0:0] = ['epoch', 'iteration', 'elapsed_time'] trainer.extend(extensions.PrintReport(report_keys), trigger=(REPORT_INTERVAL, 'iteration')) trainer.extend(extensions.ProgressBar(update_interval=REPORT_INTERVAL)) # Run the training trainer.run()
def main(args): # Initialize the model to train model = models.archs[args.arch]() if args.finetune and hasattr(model, 'finetuned_model_path'): utils.finetuning.load_param(model.finetuned_model_path, model, args.ignore) #model.finetune = True if args.initmodel: print('Load model from', args.initmodel) chainer.serializers.load_npz(args.initmodel, model) if args.gpu >= 0: chainer.cuda.get_device(args.gpu).use() model.to_gpu() nowt = datetime.datetime.today() outputdir = args.out + '/' + args.arch + '/' + nowt.strftime("%Y%m%d-%H%M") + '_bs' + str(args.batchsize) if args.test and args.initmodel is not None: outputdir = os.path.dirname(args.initmodel) # Load the datasets and mean file mean = None if hasattr(model, 'mean_value'): mean = makeMeanImage(model.mean_value) else: mean = np.load(args.mean) assert mean is not None train = ppds.PreprocessedDataset(args.train, args.root, mean, model.insize) val = ppds.PreprocessedDataset(args.val, args.root, mean, model.insize, False) # These iterators load the images with subprocesses running in parallel to # the training/validation. train_iter = chainer.iterators.MultiprocessIterator( train, args.batchsize, shuffle=False, n_processes=args.loaderjob) #val_iter = chainer.iterators.MultiprocessIterator( # val, args.val_batchsize, repeat=False, shuffle=False, n_processes=args.loaderjob) val_iter = chainer.iterators.SerialIterator( val, args.val_batchsize, repeat=False, shuffle=False) # Set up an optimizer optimizer = optimizers[args.opt]() #if args.opt == 'momentumsgd': if hasattr(optimizer, 'lr'): optimizer.lr = args.baselr if hasattr(optimizer, 'momentum'): optimizer.momentum = args.momentum optimizer.setup(model) # Set up a trainer updater = training.StandardUpdater(train_iter, optimizer, device=args.gpu) trainer = training.Trainer(updater, (args.epoch, 'epoch'), outputdir) #val_interval = (10 if args.test else int(len(train) / args.batchsize)), 'iteration' val_interval = (10, 'iteration') if args.test else (1, 'epoch') snapshot_interval = (10, 'iteration') if args.test else (4, 'epoch') log_interval = (10 if args.test else 200), 'iteration' # Copy the chain with shared parameters to flip 'train' flag only in test eval_model = model.copy() eval_model.train = False if not args.test: val_evaluator = extensions.Evaluator(val_iter, eval_model, device=args.gpu) else: val_evaluator = utils.EvaluatorPlus(val_iter, eval_model, device=args.gpu) if 'googlenet' in args.arch: val_evaluator.lastname = 'validation/main/loss3' trainer.extend(val_evaluator, trigger=val_interval) trainer.extend(extensions.dump_graph('main/loss')) trainer.extend(extensions.snapshot(), trigger=snapshot_interval) trainer.extend(extensions.snapshot_object( model, 'model_iter_{.updater.iteration}'), trigger=(500, 'iteration')) # Be careful to pass the interval directly to LogReport # (it determines when to emit log rather than when to read observations) trainer.extend(extensions.LogReport(trigger=log_interval)) trainer.extend(extensions.PrintReport([ 'epoch', 'iteration', 'main/loss', 'validation/main/loss', 'main/accuracy', 'validation/main/accuracy', ]), trigger=log_interval) trainer.extend(extensions.ProgressBar(update_interval=10)) if args.opt == 'momentumsgd': trainer.extend(extensions.ExponentialShift('lr', args.gamma), trigger=(1, 'epoch')) if args.resume: chainer.serializers.load_npz(args.resume, trainer) if not args.test: chainer.serializers.save_npz(outputdir + '/model0', model) trainer.run() chainer.serializers.save_npz(outputdir + '/model', model) with open(outputdir + '/args.txt', 'w') as o: print(args, file=o) results = val_evaluator(trainer) results['outputdir'] = outputdir if args.test: print(val_evaluator.confmat) categories = utils.io.load_categories(args.categories) confmat_csv_name = args.initmodel + '.csv' confmat_fig_name = args.initmodel + '.eps' utils.io.save_confmat_csv(confmat_csv_name, val_evaluator.confmat, categories) utils.io.save_confmat_fig(confmat_fig_name, val_evaluator.confmat, categories, mode="rate", saveFormat="eps") return results
def main(): parser = argparse.ArgumentParser( description='chainer line drawing colorization') parser.add_argument('--batchsize', '-b', type=int, default=4, help='Number of images in each mini-batch') parser.add_argument('--epoch', '-e', type=int, default=20, help='Number of sweeps over the dataset to train') parser.add_argument('--gpu', '-g', type=int, default=-1, help='GPU ID (negative value indicates CPU)') parser.add_argument('--dataset', '-i', default='./images/', help='Directory of image files.') parser.add_argument('--out', '-o', default='result', help='Directory to output the result') parser.add_argument('--resume', '-r', default='', help='Resume the training from snapshot') parser.add_argument('--seed', type=int, default=0, help='Random seed') parser.add_argument('--snapshot_interval', type=int, default=10000, help='Interval of snapshot') parser.add_argument('--display_interval', type=int, default=100, help='Interval of displaying log to console') args = parser.parse_args() print('GPU: {}'.format(args.gpu)) print('# Minibatch-size: {}'.format(args.batchsize)) print('# epoch: {}'.format(args.epoch)) print('') root = args.dataset #model = "./model_paint" cnn = unet.UNET() #serializers.load_npz("result/model_iter_10000", cnn) cnn_128 = unet.UNET() serializers.load_npz("models/model_cnn_128_dfl2_9", cnn_128) dataset = Image2ImageDatasetX2("dat/images_color_train.dat", root + "linex2/", root + "colorx2/", train=True) # dataset.set_img_dict(img_dict) train_iter = chainer.iterators.SerialIterator(dataset, args.batchsize) if args.gpu >= 0: chainer.cuda.get_device(args.gpu).use() # Make a specified GPU current cnn.to_gpu() # Copy the model to the GPU cnn_128.to_gpu() # Copy the model to the GPU # Setup optimizer parameters. opt = optimizers.Adam(alpha=0.0001) opt.setup(cnn) opt.add_hook(chainer.optimizer.WeightDecay(1e-5), 'hook_cnn') # Set up a trainer updater = ganUpdater(models=(cnn, cnn_128), iterator={ 'main': train_iter, }, optimizer={'cnn': opt}, device=args.gpu) trainer = training.Trainer(updater, (args.epoch, 'epoch'), out=args.out) snapshot_interval = (args.snapshot_interval, 'iteration') snapshot_interval2 = (args.snapshot_interval * 2, 'iteration') trainer.extend(extensions.dump_graph('cnn/loss')) trainer.extend(extensions.snapshot(), trigger=snapshot_interval2) trainer.extend(extensions.snapshot_object( cnn, 'cnn_x2_iter_{.updater.iteration}'), trigger=snapshot_interval) trainer.extend(extensions.snapshot_object(opt, 'optimizer_'), trigger=snapshot_interval) trainer.extend(extensions.LogReport(trigger=(10, 'iteration'), )) trainer.extend( extensions.PrintReport(['epoch', 'cnn/loss', 'cnn/loss_rec'])) trainer.extend(extensions.ProgressBar(update_interval=20)) trainer.run() if args.resume: # Resume from a snapshot chainer.serializers.load_npz(args.resume, trainer) # Save the trained model chainer.serializers.save_npz(os.path.join(out_dir, 'model_final'), cnn) chainer.serializers.save_npz(os.path.join(out_dir, 'optimizer_final'), opt)
def main(): processors = { "cola": ColaProcessor, "mnli": MnliProcessor, "mrpc": MrpcProcessor, } if not FLAGS.do_train and not FLAGS.do_eval and not FLAGS.do_print_test: raise ValueError("At least one of `do_train` or `do_eval` " "or `do_print_test` must be True.") bert_config = modeling.BertConfig.from_json_file(FLAGS.bert_config_file) if FLAGS.max_seq_length > bert_config.max_position_embeddings: raise ValueError( "Cannot use sequence length %d because the BERT model " "was only trained up to sequence length %d" % (FLAGS.max_seq_length, bert_config.max_position_embeddings)) if not os.path.isdir(FLAGS.output_dir): os.makedirs(FLAGS.output_dir) task_name = FLAGS.task_name.lower() if task_name not in processors: raise ValueError("Task not found: %s" % (task_name)) processor = processors[task_name]() label_list = processor.get_labels() tokenizer = tokenization.FullTokenizer( vocab_file=FLAGS.vocab_file, do_lower_case=FLAGS.do_lower_case) train_examples = None num_train_steps = None num_warmup_steps = None # TODO: use special Adam from "optimization.py" if FLAGS.do_train: train_examples = processor.get_train_examples(FLAGS.data_dir) num_train_steps = int( len(train_examples) / FLAGS.train_batch_size * FLAGS.num_train_epochs) num_warmup_steps = int(num_train_steps * FLAGS.warmup_proportion) bert = modeling.BertModel(config=bert_config) model = modeling.BertClassifier(bert, num_labels=len(label_list)) chainer.serializers.load_npz( FLAGS.init_checkpoint, model, ignore_names=['output/W', 'output/b']) if FLAGS.gpu >= 0: chainer.backends.cuda.get_device_from_id(FLAGS.gpu).use() model.to_gpu() if FLAGS.do_train: # Adam with weight decay only for 2D matrices optimizer = optimization.WeightDecayForMatrixAdam( alpha=1., # ignore alpha. instead, use eta as actual lr eps=1e-6, weight_decay_rate=0.01) optimizer.setup(model) optimizer.add_hook(chainer.optimizer.GradientClipping(1.)) train_iter = chainer.iterators.SerialIterator( train_examples, FLAGS.train_batch_size) converter = Converter( label_list, FLAGS.max_seq_length, tokenizer) updater = training.updaters.StandardUpdater( train_iter, optimizer, converter=converter, device=FLAGS.gpu) trainer = training.Trainer( updater, (num_train_steps, 'iteration'), out=FLAGS.output_dir) # learning rate (eta) scheduling in Adam lr_decay_init = FLAGS.learning_rate * \ (num_train_steps - num_warmup_steps) / num_train_steps trainer.extend(extensions.LinearShift( # decay 'eta', (lr_decay_init, 0.), (num_warmup_steps, num_train_steps))) trainer.extend(extensions.WarmupShift( # warmup 'eta', 0., num_warmup_steps, FLAGS.learning_rate)) trainer.extend(extensions.observe_value( 'eta', lambda trainer: trainer.updater.get_optimizer('main').eta), trigger=(50, 'iteration')) # logging trainer.extend(extensions.snapshot_object( model, 'model_snapshot_iter_{.updater.iteration}.npz'), trigger=(num_train_steps, 'iteration')) trainer.extend(extensions.LogReport( trigger=(50, 'iteration'))) trainer.extend(extensions.PrintReport( ['iteration', 'main/loss', 'main/accuracy', 'elapsed_time'])) trainer.extend(extensions.ProgressBar(update_interval=10)) trainer.run() if FLAGS.do_eval: eval_examples = processor.get_dev_examples(FLAGS.data_dir) test_iter = chainer.iterators.SerialIterator( eval_examples, FLAGS.train_batch_size * 2, repeat=False, shuffle=False) converter = Converter( label_list, FLAGS.max_seq_length, tokenizer) evaluator = extensions.Evaluator( test_iter, model, converter=converter, device=FLAGS.gpu) results = evaluator() print(results) # if you wanna see some output arrays for debugging if FLAGS.do_print_test: short_eval_examples = processor.get_dev_examples(FLAGS.data_dir)[:3] short_eval_examples = short_eval_examples[:FLAGS.eval_batch_size] short_test_iter = chainer.iterators.SerialIterator( short_eval_examples, FLAGS.eval_batch_size, repeat=False, shuffle=False) converter = Converter( label_list, FLAGS.max_seq_length, tokenizer) evaluator = extensions.Evaluator( test_iter, model, converter=converter, device=FLAGS.gpu) with chainer.using_config('train', False): with chainer.no_backprop_mode(): data = short_test_iter.__next__() out = model.bert.get_pooled_output( *converter(data, FLAGS.gpu)[:-1]) print(out) print(out.shape) print(converter(data, -1))
def train(): parser = argparse.ArgumentParser() parser.add_argument('--train', action='store_true') parser.add_argument('--gpu', '-g', type=int, default=-1, help='GPU ID (negative value indicates CPU)') parser.add_argument('--epoch', '-e', type=int, default=50, help='number of epochs to learn') parser.add_argument('--batchsize', '-b', type=int, default=128, help='learning minibatch size') parser.add_argument('--optimizer', type=str, default='Adam', help='optimizer to use for backprop') parser.add_argument( '--feature', '-f', type=str, default='mfcc', #choices=['mfcc', 'mfcc_delta', 'fbank', 'fbank_delta', 'plp', 'plp_delta'], help='feature type') parser.add_argument('--out', '-o', type=str, default='GCNN/model/test', help='path to the output directory') parser.add_argument('--seed', type=int, default=0, help='random seed') args = parser.parse_args() if not os.path.exists(args.out): os.makedirs(args.out) # Set seed np.random.seed(args.seed) if args.gpu >= 0: cuda.get_device_from_id(args.gpu).use() cuda.cupy.random.seed(args.seed) print(args) model = net.GCNN(6) if args.gpu >= 0: model.to_gpu() xp = np if args.gpu < 0 else cuda.cupy opt_model = chainer.optimizers.Adam() opt_model.setup(model) print('Preparing data...') dataset = SegmentDataset('train', args.feature, 512, normalized=True) train_dat, val_dat = chainer.datasets.split_dataset_random( dataset, int(len(dataset) * 0.9), seed=args.seed) train_iter = chainer.iterators.SerialIterator(train_dat, args.batchsize, shuffle=True) val_iter = chainer.iterators.SerialIterator(val_dat, args.batchsize, repeat=False, shuffle=False) updater = GCNNUpdater(iterators={'main': train_iter}, models=(model), optimizers={'model': opt_model}, device=args.gpu) trainer = training.Trainer(updater, (args.epoch, 'epoch'), out=args.out) evaluator = GCNNEvaluator(iterators={ 'main': val_iter, }, models={'model': model}, device=args.gpu) trainer.extend(evaluator) snapshot_interval = (args.epoch, 'epoch') display_interval = (1, 'epoch') # snapshot trainer.extend( extensions.snapshot(filename='{.updater.epoch}_epoch_snapshot.npz'), trigger=snapshot_interval) trainer.extend(extensions.snapshot_object(model, '{.updater.epoch}.model'), trigger=snapshot_interval) # Report log_keys = [ 'epoch', 'model/loss', 'model/acc', 'val/model/loss', 'val/model/acc' ] trainer.extend( extensions.LogReport(keys=log_keys, trigger=display_interval)) trainer.extend(extensions.PrintReport(log_keys), trigger=display_interval) trainer.extend(extensions.ProgressBar(update_interval=10)) trainer.extend( extensions.PlotReport(['model/loss', 'val/model/loss'], 'epoch', file_name='loss.png')) trainer.extend( extensions.PlotReport(['model/acc', 'val/model/acc'], 'epoch', file_name='acc.png')) trainer.run()
def main(): archs = { 'alex': alex.Alex, 'alex_fp16': alex.AlexFp16, 'googlenet': googlenet.GoogLeNet, 'googlenetbn': googlenetbn.GoogLeNetBN, 'googlenetbn_fp16': googlenetbn.GoogLeNetBNFp16, 'nin': nin.NIN } parser = argparse.ArgumentParser( description='Learning convnet from ILSVRC2012 dataset') parser.add_argument('train', help='Path to training image-label list file') parser.add_argument('val', help='Path to validation image-label list file') parser.add_argument('--arch', '-a', choices=archs.keys(), default='nin', help='Convnet architecture') parser.add_argument('--batchsize', '-B', type=int, default=32, help='Learning minibatch size') parser.add_argument('--epoch', '-E', type=int, default=10, help='Number of epochs to train') parser.add_argument('--gpus', '-g', type=int, nargs="*", default=[0, 1, 2, 3]) parser.add_argument('--initmodel', help='Initialize the model from given file') parser.add_argument('--loaderjob', '-j', type=int, help='Number of parallel data loading processes') parser.add_argument('--mean', '-m', default='mean.npy', help='Mean file (computed by compute_mean.py)') parser.add_argument('--resume', '-r', default='', help='Initialize the trainer from given file') parser.add_argument('--out', '-o', default='result', help='Output directory') parser.add_argument('--root', '-R', default='.', help='Root directory path of image files') parser.add_argument('--val_batchsize', '-b', type=int, default=250, help='Validation minibatch size') parser.add_argument('--test', action='store_true') parser.set_defaults(test=False) args = parser.parse_args() # Initialize the model to train model = archs[args.arch]() if args.initmodel: print('Load model from', args.initmodel) chainer.serializers.load_npz(args.initmodel, model) # Load the datasets and mean file mean = np.load(args.mean) train = train_imagenet.PreprocessedDataset( args.train, args.root, mean, model.insize) val = train_imagenet.PreprocessedDataset( args.val, args.root, mean, model.insize, False) # These iterators load the images with subprocesses running in parallel to # the training/validation. devices = tuple(args.gpus) train_iters = [ chainer.iterators.MultiprocessIterator(i, args.batchsize, n_processes=args.loaderjob) for i in chainer.datasets.split_dataset_n_random(train, len(devices))] val_iter = chainer.iterators.MultiprocessIterator( val, args.val_batchsize, repeat=False, n_processes=args.loaderjob) # Set up an optimizer optimizer = chainer.optimizers.MomentumSGD(lr=0.01, momentum=0.9) optimizer.setup(model) # Set up a trainer updater = updaters.MultiprocessParallelUpdater(train_iters, optimizer, devices=devices) trainer = training.Trainer(updater, (args.epoch, 'epoch'), args.out) if args.test: val_interval = 5, 'epoch' log_interval = 1, 'epoch' else: val_interval = 100000, 'iteration' log_interval = 1000, 'iteration' trainer.extend(train_imagenet.TestModeEvaluator(val_iter, model, device=args.gpus[0]), trigger=val_interval) trainer.extend(extensions.dump_graph('main/loss')) trainer.extend(extensions.snapshot(), trigger=val_interval) trainer.extend(extensions.snapshot_object( model, 'model_iter_{.updater.iteration}'), trigger=val_interval) # Be careful to pass the interval directly to LogReport # (it determines when to emit log rather than when to read observations) trainer.extend(extensions.LogReport(trigger=log_interval)) trainer.extend(extensions.observe_lr(), trigger=log_interval) trainer.extend(extensions.PrintReport([ 'epoch', 'iteration', 'main/loss', 'validation/main/loss', 'main/accuracy', 'validation/main/accuracy', 'lr' ]), trigger=log_interval) trainer.extend(extensions.ProgressBar(update_interval=2)) if args.resume: chainer.serializers.load_npz(args.resume, trainer) trainer.run()
optimizer, device=gpu_id) result_dir = '../results/multi4_transfer_soft_{}_20_100_200_{}_{}_depth{}_valid{}'.format( mode, n_topic, iteration, sum(depth) * 2 + 1, args.valid) trainer = training.Trainer(updater, (epoch_size * max_epoch, 'iteration'), out=result_dir) from chainer.training import extensions trainer.extend(extensions.LogReport(trigger=(epoch_size, 'iteration'))) trainer.extend( extensions.snapshot(filename='snapshot_iteration-{.updater.iteration}'), trigger=(epoch_size, 'iteration')) trainer.extend(extensions.snapshot_object( model.predictor, filename='model_iteration-{.updater.iteration}'), trigger=(epoch_size, 'iteration')) trainer.extend(extensions.Evaluator(test_iter, model, device=gpu_id), trigger=(epoch_size, 'iteration')) trainer.extend(extensions.observe_lr(), trigger=(epoch_size, 'iteration')) trainer.extend(extensions.PrintReport([ 'iteration', 'lr', 'main/accuracy', 'validation/main/accuracy', 'elapsed_time' ]), trigger=(epoch_size, 'iteration')) trainer.extend(extensions.dump_graph('main/loss')) trainer.extend(extensions.ExponentialShift('lr', 0.5), trigger=(epoch_size * 3, 'iteration')) trainer.extend(extensions.ProgressBar(update_interval=30)) print('running')
loss_config=config.loss, predictor=predictor, discriminator=discriminator, device=config.train.gpu, iterator=train_iter, optimizer=opts, converter=converter, ) # trainer trigger_log = (config.train.log_iteration, 'iteration') trigger_snapshot = (config.train.snapshot_iteration, 'iteration') trainer = training.Trainer(updater, out=arguments.output) ext = extensions.Evaluator(test_iter, models, converter, device=config.train.gpu, eval_func=updater.forward) trainer.extend(ext, name='test', trigger=trigger_log) ext = extensions.Evaluator(train_eval_iter, models, converter, device=config.train.gpu, eval_func=updater.forward) trainer.extend(ext, name='train', trigger=trigger_log) trainer.extend(extensions.dump_graph('predictor/loss')) ext = extensions.snapshot_object(predictor, filename='predictor_{.updater.iteration}.npz') trainer.extend(ext, trigger=trigger_snapshot) trainer.extend(extensions.LogReport(trigger=trigger_log)) trainer.extend(extensions.PrintReport(['predictor/loss'])) save_args(arguments, arguments.output) trainer.run()
def main(config): opts = config() comm = chainermn.create_communicator(opts.communicator) device = comm.intra_rank backborn_cfg = opts.backborn_cfg df = pd.read_csv(opts.path_data + opts.train_df).sample(frac=1) ################### pseudo labeling ######################### if opts.pseudo_labeling_path is not None: test_df = pd.read_csv(opts.path_data + opts.test_df) labels = np.load(opts.pseudo_labeling_path, allow_pickle=False) labels = np.concatenate((labels, labels)) count = 0 valid_array = [] valid_sirna = [] for i, label in enumerate(labels): if label.max() > 0.0013: count = count + 1 valid_array.append(i) valid_sirna.append(label.argmax()) print(count) pseudo_df = test_df.iloc[valid_array, :] pseudo_df["sirna"] = valid_sirna pseudo_df = pseudo_df df = pd.concat([df, pseudo_df]).sample(frac=1) ################### pseudo labeling ######################### for i, (train_df, valid_df) in enumerate( stratified_groups_kfold(df, target=opts.fold_target, n_splits=opts.fold)): if comm.rank == 0: train_df.to_csv( opts.path_data + 'train' + '_fold' + str(i) + '.csv', columns=[ 'id_code', 'experiment', 'plate', 'well', 'sirna', 'filename', 'cell', 'site' ]) valid_df.to_csv( opts.path_data + 'valid' + '_fold' + str(i) + '.csv', columns=[ 'id_code', 'experiment', 'plate', 'well', 'sirna', 'filename', 'cell', 'site' ]) print("Save a csvfile of fold_" + str(i)) dataset = opts.dataset train_dataset = dataset(train_df, opts.path_data) val_dataset = dataset(valid_df, opts.path_data) backborn = chcv2_get_model( backborn_cfg['name'], pretrained=backborn_cfg['pretrain'], in_size=opts.input_shape)[backborn_cfg['layer']] model = opts.model(backborn=backborn).copy(mode='init') if device >= 0: chainer.cuda.get_device(device).use() model.to_gpu() mean = opts.mean train_data = TransformDataset(train_dataset, opts.train_transform) val_data = TransformDataset(val_dataset, opts.valid_trainsform) if comm.rank == 0: train_indices = train_data val_indices = val_data else: train_indices = None val_indices = None train_data = chainermn.scatter_dataset(train_indices, comm, shuffle=True) val_data = chainermn.scatter_dataset(val_indices, comm, shuffle=False) train_iter = chainer.iterators.MultiprocessIterator( train_data, opts.batchsize, shuffle=True, n_processes=opts.loaderjob) val_iter = chainer.iterators.MultiprocessIterator( val_data, opts.batchsize, repeat=False, shuffle=False, n_processes=opts.loaderjob) print('finished loading dataset') if device >= 0: chainer.cuda.get_device(device).use() model.to_gpu() if opts.optimizer == "CorrectedMomentumSGD": optimizer = chainermn.create_multi_node_optimizer( CorrectedMomentumSGD(lr=opts.lr), comm) elif opts.optimizer == "NesterovAG": optimizer = chainermn.create_multi_node_optimizer( NesterovAG(lr=opts.lr), comm) else: optimizer = chainermn.create_multi_node_optimizer( Adam(alpha=opts.alpha, weight_decay_rate=opts.weight_decay, adabound=True, final_lr=0.5), comm) optimizer.setup(model) if opts.optimizer == "CorrectedMomentumSGD": for param in model.params(): if param.name not in ('beta', 'gamma'): param.update_rule.add_hook(WeightDecay(opts.weight_decay)) if opts.fc_lossfun == 'softmax_cross_entropy': fc_lossfun = F.softmax_cross_entropy elif opts.fc_lossfun == 'focal_loss': if opts.ls: focal_loss = FocalLoss(label_smoothing=True) else: focal_loss = FocalLoss() fc_lossfun = focal_loss.loss elif opts.fc_lossfun == 'auto_focal_loss': if opts.ls: focal_loss = AutoFocalLoss(label_smoothing=True) else: focal_loss = AutoFocalLoss() fc_lossfun = focal_loss.loss elif opts.fc_lossfun == 'auto_focal_loss_bce': if opts.ls: focal_loss = AutoFocalLossBCE(label_smoothing=True) else: focal_loss = AutoFocalLoss() fc_lossfun = focal_loss.loss if opts.metric_lossfun == 'arcface': arcface = ArcFace() metric_lossfun = arcface.loss elif opts.metric_lossfun == 'adacos': adacos = AdaCos() metric_lossfun = adacos.loss updater = opts.updater(train_iter, optimizer, model, device=device, max_epoch=opts.max_epoch, fix_sche=opts.fix_sche, metric_lossfun=metric_lossfun, fc_lossfun=fc_lossfun, metric_w=opts.metric_w, fc_w=opts.fc_w) evaluator = chainermn.create_multi_node_evaluator( opts.evaluator(val_iter, model, device=device, max_epoch=opts.max_epoch, fix_sche=opts.fix_sche, metric_lossfun=metric_lossfun, fc_lossfun=fc_lossfun, metric_w=opts.metric_w, fc_w=opts.fc_w), comm) trainer = training.Trainer(updater, (opts.max_epoch, 'epoch'), out=opts.out + '_fold' + str(i)) if opts.optimizer == "CorrectedMomentumSGD": trainer.extend(extensions.ExponentialShift('lr', opts.shift_lr), trigger=ManualScheduleTrigger( opts.lr_points, 'epoch')) elif opts.optimizer == "NesterovAG": trainer.extend(extensions.ExponentialShift('lr', opts.shift_lr), trigger=ManualScheduleTrigger( opts.lr_points, 'epoch')) else: trainer.extend(extensions.ExponentialShift('alpha', opts.shift_lr), trigger=ManualScheduleTrigger( opts.lr_points, 'epoch')) trainer.extend(evaluator, trigger=(int(opts.max_epoch / 10), 'epoch')) # trainer.extend(evaluator, trigger=(int(1), 'epoch')) log_interval = 0.1, 'epoch' print_interval = 0.1, 'epoch' if comm.rank == 0: trainer.extend(chainer.training.extensions.observe_lr(), trigger=log_interval) trainer.extend(extensions.snapshot_object( model, 'snapshot_model' + '_{.updater.epoch}.npz'), trigger=(opts.max_epoch / 10, 'epoch')) trainer.extend(extensions.snapshot_object( model, 'snapshot_model_f1max.npz'), trigger=chainer.training.triggers.MaxValueTrigger( 'validation/main/accuracy', trigger=(opts.max_epoch / 10, 'epoch'))) trainer.extend(extensions.LogReport(trigger=log_interval)) trainer.extend(extensions.PrintReport([ 'iteration', 'epoch', 'elapsed_time', 'lr', 'main/loss', 'main/face_loss', 'main/ce_loss', 'main/accuracy', 'validation/main/loss', 'validation/main/face_loss', 'validation/main/ce_loss', 'validation/main/accuracy' ]), trigger=print_interval) trainer.extend(extensions.ProgressBar(update_interval=10)) trainer.run()
device=args.gpu) trainer = chainer.training.Trainer(updater, (args.epoch, 'epoch'), out=args.out) trainer.extend(evaluator, trigger=(1, 'epoch')) @make_shift('lr') def lr_schedule(trainer): max_lr = args.lr min_lr = 0 epoch = trainer.updater.epoch_detail progress_ratio = epoch / args.epoch rate = 0.5 * (math.cos(math.pi * progress_ratio) + 1) return min_lr + max_lr * rate trainer.extend(lr_schedule) trainer.extend(extensions.LogReport(), trigger=(1, 'epoch')) trainer.extend(extensions.observe_lr(), trigger=(1, 'epoch')) trainer.extend(extensions.PrintReport([ 'epoch', 'lr', 'main/loss', 'validation/main/loss', 'main/accuracy', 'validation/main/accuracy', 'elapsed_time' ]), trigger=(1, 'epoch')) trainer.extend(extensions.ProgressBar(update_interval=50)) trainer.extend(extensions.snapshot_object( model, 'model_iter_{.updater.epoch_detail}'), trigger=(50, 'epoch')) trainer.run()
def main(): bbox_label_names = ('loop') n_itrs = 70000 n_step = 50000 np.random.seed(0) train_data = MultiDefectDetectionDataset(split='train') test_data = MultiDefectDetectionDataset(split='test') proposal_params = {'min_size': 8} faster_rcnn = FasterRCNNVGG16(n_fg_class=2, pretrained_model='imagenet', ratios=[0.5, 1, 2], anchor_scales=[0.5, 1, 4, 8, 16], min_size=1024, max_size=1024, proposal_creator_params=proposal_params) faster_rcnn.use_preset('evaluate') model = FasterRCNNTrainChain(faster_rcnn) chainer.cuda.get_device_from_id(0).use() model.to_gpu() optimizer = chainer.optimizers.MomentumSGD(lr=1e-3, momentum=0.8) optimizer.setup(model) optimizer.add_hook(chainer.optimizer.WeightDecay(rate=0.0005)) train_data = TransformDataset(train_data, Transform(faster_rcnn)) train_iter = chainer.iterators.MultiprocessIterator(train_data, batch_size=1, n_processes=None, shared_mem=100000000) test_iter = chainer.iterators.SerialIterator(test_data, batch_size=1, repeat=False, shuffle=False) updater = chainer.training.updater.StandardUpdater(train_iter, optimizer, device=0) trainer = training.Trainer(updater, (n_itrs, 'iteration'), out='result') trainer.extend(extensions.snapshot_object( model.faster_rcnn, 'snapshot_model_{.updater.iteration}.npz'), trigger=(n_itrs / 5, 'iteration')) trainer.extend(extensions.ExponentialShift('lr', 0.1), trigger=(n_step, 'iteration')) log_interval = 50, 'iteration' plot_interval = 100, 'iteration' print_interval = 20, 'iteration' trainer.extend(chainer.training.extensions.observe_lr(), trigger=log_interval) trainer.extend(extensions.LogReport(trigger=log_interval)) trainer.extend(extensions.PrintReport([ 'iteration', 'epoch', 'elapsed_time', 'lr', 'main/loss', 'main/roi_loc_loss', 'main/roi_cls_loss', 'main/rpn_loc_loss', 'main/rpn_cls_loss', 'validation/main/map', ]), trigger=print_interval) trainer.extend(extensions.ProgressBar(update_interval=5)) if extensions.PlotReport.available(): trainer.extend(extensions.PlotReport(['main/loss'], file_name='loss.png', trigger=plot_interval), trigger=plot_interval) trainer.extend( DetectionVOCEvaluator(test_iter, model.faster_rcnn, use_07_metric=True, label_names=bbox_label_names), trigger=ManualScheduleTrigger( [100, 500, 1000, 5000, 10000, 20000, 40000, 60000, n_step, n_itrs], 'iteration')) trainer.extend(extensions.dump_graph('main/loss')) trainer.run()
def main(): current_datetime = '{}'.format(datetime.datetime.today()) parser = argparse.ArgumentParser( description='Chainer example: Text Classification') parser.add_argument('--batchsize', '-b', type=int, default=64, help='Number of images in each mini-batch') parser.add_argument('--epoch', '-e', type=int, default=30, help='Number of sweeps over the dataset to train') parser.add_argument('--gpu', '-g', type=int, default=-1, help='GPU ID (negative value indicates CPU)') parser.add_argument('--out', '-o', default='result', help='Directory to output the result') parser.add_argument('--unit', '-u', type=int, default=300, help='Number of units') parser.add_argument('--layer', '-l', type=int, default=1, help='Number of layers of RNN or MLP following CNN') parser.add_argument('--dropout', '-d', type=float, default=0.4, help='Dropout rate') parser.add_argument('--dataset', '-data', default='imdb.binary', choices=['dbpedia', 'imdb.binary', 'imdb.fine', 'TREC', 'stsa.binary', 'stsa.fine', 'custrev', 'mpqa', 'rt-polarity', 'subj'], help='Name of dataset.') parser.add_argument('--model', '-model', default='cnn', choices=['cnn', 'rnn', 'bow'], help='Name of encoder model type.') parser.add_argument('--char-based', action='store_true') parser.add_argument('--test', dest='test', action='store_true') parser.set_defaults(test=False) args = parser.parse_args() print(json.dumps(args.__dict__, indent=2)) # Load a dataset if args.dataset == 'dbpedia': train, test, vocab = text_datasets.get_dbpedia( char_based=args.char_based) elif args.dataset.startswith('imdb.'): train, test, vocab = text_datasets.get_imdb( fine_grained=args.dataset.endswith('.fine'), char_based=args.char_based) elif args.dataset in ['TREC', 'stsa.binary', 'stsa.fine', 'custrev', 'mpqa', 'rt-polarity', 'subj']: train, test, vocab = text_datasets.get_other_text_dataset( args.dataset, char_based=args.char_based) if args.test: train = train[:100] test = test[:100] print('# train data: {}'.format(len(train))) print('# test data: {}'.format(len(test))) print('# vocab: {}'.format(len(vocab))) n_class = len(set([int(d[1]) for d in train])) print('# class: {}'.format(n_class)) train_iter = chainer.iterators.SerialIterator(train, args.batchsize) test_iter = chainer.iterators.SerialIterator(test, args.batchsize, repeat=False, shuffle=False) # Setup a model if args.model == 'rnn': Encoder = nets.RNNEncoder elif args.model == 'cnn': Encoder = nets.CNNEncoder elif args.model == 'bow': Encoder = nets.BOWMLPEncoder encoder = Encoder(n_layers=args.layer, n_vocab=len(vocab), n_units=args.unit, dropout=args.dropout) model = nets.TextClassifier(encoder, n_class) if args.gpu >= 0: # Make a specified GPU current chainer.backends.cuda.get_device_from_id(args.gpu).use() model.to_gpu() # Copy the model to the GPU # Setup an optimizer optimizer = chainer.optimizers.Adam() optimizer.setup(model) optimizer.add_hook(chainer.optimizer.WeightDecay(1e-4)) # Set up a trainer updater = training.updaters.StandardUpdater( train_iter, optimizer, converter=convert_seq, device=args.gpu) trainer = training.Trainer(updater, (args.epoch, 'epoch'), out=args.out) # Evaluate the model with the test dataset for each epoch trainer.extend(extensions.Evaluator( test_iter, model, converter=convert_seq, device=args.gpu)) # Take a best snapshot record_trigger = training.triggers.MaxValueTrigger( 'validation/main/accuracy', (1, 'epoch')) trainer.extend(extensions.snapshot_object( model, 'best_model.npz'), trigger=record_trigger) # Write a log of evaluation statistics for each epoch trainer.extend(extensions.LogReport()) trainer.extend(extensions.PrintReport( ['epoch', 'main/loss', 'validation/main/loss', 'main/accuracy', 'validation/main/accuracy', 'elapsed_time'])) # Print a progress bar to stdout trainer.extend(extensions.ProgressBar()) # Save vocabulary and model's setting if not os.path.isdir(args.out): os.mkdir(args.out) vocab_path = os.path.join(args.out, 'vocab.json') with open(vocab_path, 'w') as f: json.dump(vocab, f) model_path = os.path.join(args.out, 'best_model.npz') model_setup = args.__dict__ model_setup['vocab_path'] = vocab_path model_setup['model_path'] = model_path model_setup['n_class'] = n_class model_setup['datetime'] = current_datetime with open(os.path.join(args.out, 'args.json'), 'w') as f: json.dump(args.__dict__, f) # Run the training trainer.run()
def test_trigger(self): target = mock.MagicMock() snapshot_object = extensions.snapshot_object(target, 'myfile.dat', trigger=self.trigger) self.assertEqual(snapshot_object.trigger, self.trigger)
def train(args): '''Run training''' # seed setting torch.manual_seed(args.seed) # debug mode setting # 0 would be fastest, but 1 seems to be reasonable # by considering reproducability # revmoe type check if args.debugmode < 2: chainer.config.type_check = False logging.info('torch type check is disabled') # use determinisitic computation or not if args.debugmode < 1: torch.backends.cudnn.deterministic = False logging.info('torch cudnn deterministic is disabled') else: torch.backends.cudnn.deterministic = True # check cuda availability if not torch.cuda.is_available(): logging.warning('cuda is not available') # get input and output dimension info with open(args.valid_json, 'rb') as f: valid_json = json.load(f)['utts'] utts = list(valid_json.keys()) idim = int(valid_json[utts[0]]['input'][0]['shape'][1]) odim = int(valid_json[utts[0]]['output'][0]['shape'][1]) logging.info('#input dims : ' + str(idim)) logging.info('#output dims: ' + str(odim)) odim_adv = int(valid_json[utts[0]]['output'][1]['shape'][1]) logging.info('#output dims adversarial: ' + str(odim_adv)) # specify model architecture e2e = E2E(idim, odim_adv, args) model = Loss(e2e) # write model config if not os.path.exists(args.outdir): os.makedirs(args.outdir) model_conf = args.outdir + '/model.json' with open(model_conf, 'wb') as f: logging.info('writing a model config file to ' + model_conf) f.write( json.dumps((idim, odim, odim_adv, vars(args)), indent=4, sort_keys=True).encode('utf_8')) for key in sorted(vars(args).keys()): logging.info('ARGS: ' + key + ': ' + str(vars(args)[key])) # Log total number of parameters pytorch_total_params = sum(p.numel() for p in e2e.parameters()) logging.info("Total parameters in e2e: " + str(pytorch_total_params)) reporter = model.reporter # check the use of multi-gpu if args.ngpu > 1: model = torch.nn.DataParallel(model, device_ids=list(range(args.ngpu))) logging.info('batch size is automatically increased (%d -> %d)' % (args.batch_size, args.batch_size * args.ngpu)) args.batch_size *= args.ngpu # set torch device device = torch.device("cuda" if args.ngpu > 0 else "cpu") model = model.to(device) # Setup an optimizer if args.opt == 'adadelta': optimizer = torch.optim.Adadelta(model.parameters(), rho=0.95, eps=args.eps) elif args.opt == 'adam': optimizer = torch.optim.Adam(model.parameters()) # FIXME: TOO DIRTY HACK setattr(optimizer, "target", reporter) setattr(optimizer, "serialize", lambda s: reporter.serialize(s)) # Setup a converter converter = CustomConverter() # read json data with open(args.train_json, 'rb') as f: train_json = json.load(f)['utts'] with open(args.valid_json, 'rb') as f: valid_json = json.load(f)['utts'] # make minibatch list (variable length) train = make_batchset(train_json, args.batch_size, args.maxlen_in, args.maxlen_out, args.minibatches, min_batch_size=args.ngpu if args.ngpu > 1 else 1) valid = make_batchset(valid_json, args.batch_size, args.maxlen_in, args.maxlen_out, args.minibatches, min_batch_size=args.ngpu if args.ngpu > 1 else 1) # hack to make batchsze argument as 1 # actual bathsize is included in a list if args.n_iter_processes > 0: train_iter = chainer.iterators.MultiprocessIterator( TransformDataset(train, converter.transform), batch_size=1, n_processes=args.n_iter_processes, n_prefetch=8, maxtasksperchild=20) valid_iter = chainer.iterators.MultiprocessIterator( TransformDataset(valid, converter.transform), batch_size=1, repeat=False, shuffle=False, n_processes=args.n_iter_processes, n_prefetch=8, maxtasksperchild=20) else: train_iter = chainer.iterators.SerialIterator(TransformDataset( train, converter.transform), batch_size=1) valid_iter = chainer.iterators.SerialIterator(TransformDataset( valid, converter.transform), batch_size=1, repeat=False, shuffle=False) # Set up a trainer updater = CustomUpdater(model, args.grad_clip, train_iter, optimizer, converter, device, args.ngpu) trainer = training.Trainer(updater, (args.epochs, 'epoch'), out=args.outdir) # Resume from a snapshot if args.resume: logging.info('resumed from %s' % args.resume) torch_resume(args.resume, trainer, weight_sharing=args.weight_sharing) # Evaluate the model with the test dataset for each epoch trainer.extend( CustomEvaluator(model, valid_iter, reporter, converter, device)) # Make a plot for training and validation values trainer.extend( extensions.PlotReport(['main/loss_adv', 'validation/main/loss_adv'], 'epoch', file_name='loss.png')) trainer.extend( extensions.PlotReport(['main/acc_adv', 'validation/main/acc_adv'], 'epoch', file_name='acc.png')) # Save best models trainer.extend( extensions.snapshot_object(model, 'model.loss.best', savefun=torch_save), trigger=training.triggers.MinValueTrigger('validation/main/loss_adv')) trainer.extend( extensions.snapshot_object(model, 'model.acc.best', savefun=torch_save), trigger=training.triggers.MaxValueTrigger('validation/main/acc_adv')) # save snapshot which contains model and optimizer states trainer.extend(torch_snapshot(), trigger=(1, 'epoch')) # epsilon decay in the optimizer if args.opt == 'adadelta': if args.criterion == 'acc': trainer.extend(restore_snapshot(model, args.outdir + '/model.acc.best', load_fn=torch_load), trigger=CompareValueTrigger( 'validation/main/acc_adv', lambda best_value, current_value: best_value > current_value)) trainer.extend(adadelta_eps_decay(args.eps_decay), trigger=CompareValueTrigger( 'validation/main/acc_adv', lambda best_value, current_value: best_value > current_value)) elif args.criterion == 'loss': trainer.extend(restore_snapshot(model, args.outdir + '/model.loss.best', load_fn=torch_load), trigger=CompareValueTrigger( 'validation/main/loss_adv', lambda best_value, current_value: best_value < current_value)) trainer.extend(adadelta_eps_decay(args.eps_decay), trigger=CompareValueTrigger( 'validation/main/loss_adv', lambda best_value, current_value: best_value < current_value)) # Write a log of evaluation statistics for each epoch trainer.extend(extensions.LogReport(trigger=(REPORT_INTERVAL, 'iteration'))) report_keys = ['epoch', 'iteration', 'elapsed_time'] if args.opt == 'adadelta': trainer.extend(extensions.observe_value( 'eps', lambda trainer: trainer.updater.get_optimizer('main'). param_groups[0]["eps"]), trigger=(REPORT_INTERVAL, 'iteration')) report_keys.append('eps') report_keys.extend([ 'main/loss_adv', 'main/acc_adv', 'validation/main/loss_adv', 'validation/main/acc_adv' ]) trainer.extend(extensions.PrintReport(report_keys), trigger=(REPORT_INTERVAL, 'iteration')) trainer.extend(extensions.ProgressBar(update_interval=REPORT_INTERVAL)) # Run the training trainer.run()
def main(): parser = argparse.ArgumentParser() parser.add_argument('--out', type=str, default='result', help='Output directory') parser.add_argument('--mscoco-root', type=str, default='data', help='MSOCO dataset root directory') parser.add_argument('--max-iters', type=int, default=50000, help='Maximum number of iterations to train') parser.add_argument('--batch-size', type=int, default=128, help='Minibatch size') parser.add_argument('--dropout-ratio', type=float, default=0.5, help='Language model dropout ratio') parser.add_argument('--val-keep-quantity', type=int, default=100, help='Keep every N-th validation image') parser.add_argument('--val-iter', type=int, default=100, help='Run validation every N-th iteration') parser.add_argument('--log-iter', type=int, default=1, help='Log every N-th iteration') parser.add_argument('--snapshot-iter', type=int, default=1000, help='Model snapshot every N-th iteration') parser.add_argument('--rnn', type=str, default='nsteplstm', choices=['nsteplstm', 'lstm'], help='Language model layer type') parser.add_argument('--gpu', type=int, default=0, help='GPU ID (negative value indicates CPU)') parser.add_argument('--max-caption-length', type=int, default=30, help='Maxium caption length when using LSTM layer') args = parser.parse_args() # Load the MSCOCO dataset. Assumes that the dataset has been downloaded # already using e.g. the `download.py` script train, val = datasets.get_mscoco(args.mscoco_root) # Validation samples are used to address overfitting and see how well your # model generalizes to yet unseen data. However, since the number of these # samples in MSCOCO is quite large (~200k) and thus require time to # evaluate, you may choose to use only a fraction of the available samples val = val[::args.val_keep_quantity] # Number of unique words that are found in the dataset vocab_size = len(train.vocab) # Instantiate the model to be trained either with LSTM layers or with # NStepLSTM layers model = ImageCaptionModel( vocab_size, dropout_ratio=args.dropout_ratio, rnn=args.rnn) if args.gpu >= 0: chainer.backends.cuda.get_device_from_id(args.gpu).use() model.to_gpu() def transform(in_data): # Called for each sample and applies necessary preprocessing to the # image such as resizing and normalizing img, caption = in_data img = model.prepare(img) return img, caption # We need to preprocess the images since their sizes may vary (and the # model requires that they have the exact same fixed size) train = TransformDataset(train, transform) val = TransformDataset(val, transform) train_iter = iterators.MultiprocessIterator( train, args.batch_size, shared_mem=700000) val_iter = chainer.iterators.MultiprocessIterator( val, args.batch_size, repeat=False, shuffle=False, shared_mem=700000) optimizer = optimizers.Adam() optimizer.setup(model) def converter(batch, device): # The converted receives a batch of input samples any may modify it if # necessary. In our case, we need to align the captions depending on if # we are using LSTM layers of NStepLSTM layers in the model. if args.rnn == 'lstm': max_caption_length = args.max_caption_length elif args.rnn == 'nsteplstm': max_caption_length = None else: raise ValueError('Invalid RNN type.') return datasets.converter( batch, device, max_caption_length=max_caption_length) updater = training.updater.StandardUpdater( train_iter, optimizer=optimizer, device=args.gpu, converter=converter) trainer = training.Trainer( updater, out=args.out, stop_trigger=(args.max_iters, 'iteration')) trainer.extend( extensions.Evaluator( val_iter, target=model, converter=converter, device=args.gpu ), trigger=(args.val_iter, 'iteration') ) trainer.extend( extensions.LogReport( ['main/loss', 'validation/main/loss'], trigger=(args.log_iter, 'iteration') ) ) trainer.extend( extensions.PlotReport( ['main/loss', 'validation/main/loss'], trigger=(args.log_iter, 'iteration') ) ) trainer.extend( extensions.PrintReport( ['elapsed_time', 'epoch', 'iteration', 'main/loss', 'validation/main/loss'] ), trigger=(args.log_iter, 'iteration') ) # Save model snapshots so that later on, we can load them and generate new # captions for any image. This can be done in the `predict.py` script trainer.extend( extensions.snapshot_object(model, 'model_{.updater.iteration}'), trigger=(args.snapshot_iter, 'iteration') ) trainer.extend(extensions.ProgressBar()) trainer.run()
def main(): parser = argparse.ArgumentParser(description='Train script') parser.add_argument('dataset_directory') parser.add_argument('--resize', type=int, default=32) parser.add_argument('--batchsize', '-b', type=int, default=16) parser.add_argument('--max_iter', '-m', type=int, default=4000000) parser.add_argument('--gpu', '-g', type=int, default=0, help='GPU ID (negative value indicates CPU)') parser.add_argument('--out', '-o', default="result", help='Directory to output the result') parser.add_argument('--snapshot_interval', type=int, default=5000, help='Interval of snapshot') parser.add_argument('--evaluation_interval', type=int, default=50000, help='Interval of evaluation') parser.add_argument('--out_image_interval', type=int, default=5000, help='Interval of evaluation') parser.add_argument('--stage_interval', type=int, default=400000, help='Interval of stage progress') parser.add_argument('--display_interval', type=int, default=100, help='Interval of displaying log to console') parser.add_argument( '--n_dis', type=int, default=1, help='number of discriminator update per generator update') parser.add_argument('--lam', type=float, default=10, help='gradient penalty') parser.add_argument('--gamma', type=float, default=750, help='gradient penalty') parser.add_argument('--pooling_comp', type=float, default=1.0, help='compensation') parser.add_argument('--pretrained_generator', type=str, default="") parser.add_argument('--pretrained_discriminator', type=str, default="") parser.add_argument('--initial_stage', type=float, default=0.0) parser.add_argument('--generator_smoothing', type=float, default=0.999) args = parser.parse_args() result_directory_name = "_".join([ "resize{}".format(args.resize), "stage{}".format(args.initial_stage), "batch{}".format(args.batchsize), "stginterval{}".format(args.stage_interval), str(int(time.time())), ]) result_directory = os.path.join(args.out, result_directory_name) record_setting(result_directory) check_chainer_version() report_keys = [ "stage", "loss_dis", "loss_gp", "loss_gen", "g", "inception_mean", "inception_std", "FID" ] max_iter = args.max_iter if args.gpu >= 0: chainer.cuda.get_device_from_id(args.gpu).use() if args.resize == 32: channel_evolution = (512, 512, 512, 256) elif args.resize == 128: channel_evolution = (512, 512, 512, 512, 256, 128) elif args.resize == 256: channel_evolution = (512, 512, 512, 512, 256, 128, 64 ) # too much memory # channel_evolution = (512, 512, 512, 256, 128, 64, 32) elif args.resize == 512: channel_evolution = (512, 512, 512, 512, 256, 128, 64, 32) elif args.resize == 1024: channel_evolution = (512, 512, 512, 512, 256, 128, 64, 32, 16) else: raise Exception() # generator = Generator() # generator_smooth = Generator() generator = chainer_progressive_gan.models.progressive_generator.ProgressiveGenerator( channel_evolution=channel_evolution) generator_smooth = chainer_progressive_gan.models.progressive_generator.ProgressiveGenerator( channel_evolution=channel_evolution) # discriminator = Discriminator(pooling_comp=args.pooling_comp) discriminator = chainer_progressive_gan.models.progressive_discriminator.ProgressiveDiscriminator( pooling_comp=args.pooling_comp, channel_evolution=channel_evolution) # select GPU if args.gpu >= 0: generator.to_gpu() generator_smooth.to_gpu() discriminator.to_gpu() print("use gpu {}".format(args.gpu)) if args.pretrained_generator != "": chainer.serializers.load_npz(args.pretrained_generator, generator) if args.pretrained_discriminator != "": chainer.serializers.load_npz(args.pretrained_discriminator, discriminator) copy_param(generator_smooth, generator) opt_gen = make_optimizer(generator) opt_dis = make_optimizer(discriminator) if args.dataset_directory == 'cifar10': import chainer_gan_lib.common.dataset train_dataset = chainer_gan_lib.common.dataset.Cifar10Dataset() else: dataset_pathes = list(glob.glob("{}/*".format(args.dataset_directory))) print("use {} files".format(len(dataset_pathes))) train_dataset = datasets.ResizedImageDataset(dataset_pathes, resize=(args.resize, args.resize)) train_iter = chainer.iterators.SerialIterator(train_dataset, args.batchsize) # Set up a trainer updater = progressive_updater.ProgressiveUpdater( resolution=args.resize, models=(generator, discriminator, generator_smooth), iterator={'main': train_iter}, optimizer={ 'opt_gen': opt_gen, 'opt_dis': opt_dis }, device=args.gpu, n_dis=args.n_dis, lam=args.lam, gamma=args.gamma, smoothing=args.generator_smoothing, initial_stage=args.initial_stage, stage_interval=args.stage_interval) trainer = training.Trainer(updater, (max_iter, 'iteration'), out=result_directory) trainer.extend(extensions.snapshot_object( generator, 'generator_{.updater.iteration}.npz'), trigger=(args.snapshot_interval, 'iteration')) trainer.extend(extensions.snapshot_object( generator_smooth, 'generator_smooth_{.updater.iteration}.npz'), trigger=(args.snapshot_interval, 'iteration')) trainer.extend(extensions.snapshot_object( discriminator, 'discriminator_{.updater.iteration}.npz'), trigger=(args.snapshot_interval, 'iteration')) trainer.extend( extensions.LogReport(keys=report_keys, trigger=(args.display_interval, 'iteration'))) trainer.extend(extensions.PrintReport(report_keys), trigger=(args.display_interval, 'iteration')) trainer.extend(sample_generate(generator_smooth, result_directory), trigger=(args.out_image_interval, 'iteration'), priority=extension.PRIORITY_WRITER) trainer.extend(sample_generate_light(generator_smooth, result_directory), trigger=(args.evaluation_interval // 10, 'iteration'), priority=extension.PRIORITY_WRITER) # trainer.extend(calc_inception(generator_smooth), trigger=(args.evaluation_interval, 'iteration'), # priority=extension.PRIORITY_WRITER) # trainer.extend(calc_FID(generator_smooth), trigger=(args.evaluation_interval, 'iteration'), # priority=extension.PRIORITY_WRITER) trainer.extend(extensions.ProgressBar(update_interval=10)) # Run the training trainer.run()
def main(): archs = { 'alex': alex.Alex, 'alex_fp16': alex.AlexFp16, 'googlenet': googlenet.GoogLeNet, 'googlenetbn': googlenetbn.GoogLeNetBN, 'googlenetbn_fp16': googlenetbn.GoogLeNetBNFp16, 'nin': nin.NIN, 'resnet50': resnet50.ResNet50, 'resnext50': resnet50.ResNeXt50, } parser = argparse.ArgumentParser( description='Learning convnet from ILSVRC2012 dataset') parser.add_argument('train', help='Path to training image-label list file') parser.add_argument('val', help='Path to validation image-label list file') parser.add_argument('--arch', '-a', choices=archs.keys(), default='nin', help='Convnet architecture') parser.add_argument('--batchsize', '-B', type=int, default=32, help='Learning minibatch size') parser.add_argument('--epoch', '-E', type=int, default=10, help='Number of epochs to train') parser.add_argument('--gpu', '-g', type=int, default=-1, help='GPU ID (negative value indicates CPU') parser.add_argument('--initmodel', help='Initialize the model from given file') parser.add_argument('--loaderjob', '-j', type=int, help='Number of parallel data loading processes') parser.add_argument('--mean', '-m', default='mean.npy', help='Mean file (computed by compute_mean.py)') parser.add_argument('--resume', '-r', default='', help='Initialize the trainer from given file') parser.add_argument('--out', '-o', default='result', help='Output directory') parser.add_argument('--root', '-R', default='.', help='Root directory path of image files') parser.add_argument('--val_batchsize', '-b', type=int, default=250, help='Validation minibatch size') parser.add_argument('--test', action='store_true') parser.set_defaults(test=False) args = parser.parse_args() # Initialize the model to train model = archs[args.arch]() if args.initmodel: print('Load model from {}'.format(args.initmodel)) chainer.serializers.load_npz(args.initmodel, model) if args.gpu >= 0: chainer.backends.cuda.get_device_from_id( args.gpu).use() # Make the GPU current model.to_gpu() # Load the datasets and mean file mean = np.load(args.mean) train = PreprocessedDataset(args.train, args.root, mean, model.insize) val = PreprocessedDataset(args.val, args.root, mean, model.insize, False) # These iterators load the images with subprocesses running in parallel to # the training/validation. train_iter = chainer.iterators.MultiprocessIterator( train, args.batchsize, n_processes=args.loaderjob) val_iter = chainer.iterators.MultiprocessIterator( val, args.val_batchsize, repeat=False, n_processes=args.loaderjob) # Set up an optimizer optimizer = chainer.optimizers.MomentumSGD(lr=0.01, momentum=0.9) optimizer.setup(model) # Set up a trainer updater = training.updaters.StandardUpdater( train_iter, optimizer, device=args.gpu) trainer = training.Trainer(updater, (args.epoch, 'epoch'), args.out) val_interval = (1 if args.test else 100000), 'iteration' log_interval = (1 if args.test else 1000), 'iteration' trainer.extend(extensions.Evaluator(val_iter, model, device=args.gpu), trigger=val_interval) trainer.extend(extensions.dump_graph('main/loss')) trainer.extend(extensions.snapshot(), trigger=val_interval) trainer.extend(extensions.snapshot_object( model, 'model_iter_{.updater.iteration}'), trigger=val_interval) # Be careful to pass the interval directly to LogReport # (it determines when to emit log rather than when to read observations) trainer.extend(extensions.LogReport(trigger=log_interval)) trainer.extend(extensions.observe_lr(), trigger=log_interval) trainer.extend(extensions.PrintReport([ 'epoch', 'iteration', 'main/loss', 'validation/main/loss', 'main/accuracy', 'validation/main/accuracy', 'lr' ]), trigger=log_interval) trainer.extend(extensions.ProgressBar(update_interval=10)) if args.resume: chainer.serializers.load_npz(args.resume, trainer) trainer.run()
def main(): parser = argparse.ArgumentParser(description='Chainer example: DCGAN') parser.add_argument('--batchsize', '-b', type=int, default=50, help='Number of images in each mini-batch') parser.add_argument('--epoch', '-e', type=int, default=1000, help='Number of sweeps over the dataset to train') parser.add_argument('--device', '-d', type=str, default='-1', help='Device specifier. Either ChainerX device ' 'specifier or an integer. If non-negative integer, ' 'CuPy arrays with specified device id are used. If ' 'negative integer, NumPy arrays are used') parser.add_argument('--dataset', '-i', default='', help='Directory of image files. Default is cifar-10.') parser.add_argument('--out', '-o', default='result', help='Directory to output the result') parser.add_argument('--resume', '-r', type=str, help='Resume the training from snapshot') parser.add_argument('--n_hidden', '-n', type=int, default=100, help='Number of hidden units (z)') parser.add_argument('--seed', type=int, default=0, help='Random seed of z at visualization stage') parser.add_argument('--snapshot_interval', type=int, default=1000, help='Interval of snapshot') parser.add_argument('--display_interval', type=int, default=100, help='Interval of displaying log to console') group = parser.add_argument_group('deprecated arguments') group.add_argument('--gpu', '-g', dest='device', type=int, nargs='?', const=0, help='GPU ID (negative value indicates CPU)') args = parser.parse_args() device = chainer.get_device(args.device) device.use() print('Device: {}'.format(device)) print('# Minibatch-size: {}'.format(args.batchsize)) print('# n_hidden: {}'.format(args.n_hidden)) print('# epoch: {}'.format(args.epoch)) print('') # Set up a neural network to train gen = Generator(n_hidden=args.n_hidden) dis = Discriminator() gen.to_device(device) # Copy the model to the device dis.to_device(device) # Setup an optimizer def make_optimizer(model, alpha=0.0002, beta1=0.5): optimizer = chainer.optimizers.Adam(alpha=alpha, beta1=beta1) optimizer.setup(model) optimizer.add_hook( chainer.optimizer_hooks.WeightDecay(0.0001), 'hook_dec') return optimizer opt_gen = make_optimizer(gen) opt_dis = make_optimizer(dis) if args.dataset == '': # Load the CIFAR10 dataset if args.dataset is not specified train, _ = chainer.datasets.get_cifar10(withlabel=False, scale=255.) else: all_files = os.listdir(args.dataset) image_files = [f for f in all_files if ('png' in f or 'jpg' in f)] print('{} contains {} image files' .format(args.dataset, len(image_files))) train = chainer.datasets\ .ImageDataset(paths=image_files, root=args.dataset) # Setup an iterator train_iter = chainer.iterators.SerialIterator(train, args.batchsize) # Setup an updater updater = DCGANUpdater( models=(gen, dis), iterator=train_iter, optimizer={ 'gen': opt_gen, 'dis': opt_dis}, device=device) # Setup a trainer trainer = training.Trainer(updater, (args.epoch, 'epoch'), out=args.out) snapshot_interval = (args.snapshot_interval, 'iteration') display_interval = (args.display_interval, 'iteration') trainer.extend( extensions.snapshot(filename='snapshot_iter_{.updater.iteration}.npz'), trigger=snapshot_interval) trainer.extend(extensions.snapshot_object( gen, 'gen_iter_{.updater.iteration}.npz'), trigger=snapshot_interval) trainer.extend(extensions.snapshot_object( dis, 'dis_iter_{.updater.iteration}.npz'), trigger=snapshot_interval) trainer.extend(extensions.LogReport(trigger=display_interval)) trainer.extend(extensions.PrintReport([ 'epoch', 'iteration', 'gen/loss', 'dis/loss', ]), trigger=display_interval) trainer.extend(extensions.ProgressBar(update_interval=10)) trainer.extend( out_generated_image( gen, dis, 10, 10, args.seed, args.out), trigger=snapshot_interval) if args.resume is not None: # Resume from a snapshot chainer.serializers.load_npz(args.resume, trainer) # Run the training trainer.run()
def train(args): san_check = args.san_check epoch = args.epoch lr = args.lr b_size = args.b_size device = args.device w_decay = args.w_decay image_net = args.image_net phrase_net = args.phrase_net preload = args.preload wo_image = (image_net is None) out_base = 'checkpoint/' time_stamp = dt.now().strftime("%Y%m%d-%H%M%S") saveto = out_base + '{}{}-{}_{}/'.format( 'sc_' * san_check, phrase_net, image_net, time_stamp) os.makedirs(saveto) json.dump(vars(args), open(saveto+'settings.json', 'w')) print('setup dataset...') train, conv_f = get_dataset(phrase_net, image_net=image_net, split='train', preload=preload, san_check=args.san_check) val, _ = get_dataset(phrase_net, image_net=image_net, split='val', skip=10*4, preload=preload, san_check=args.san_check) train_iter = SampleManager(train, b_size, p_batch_ratio=.15) val_iter = SerialIterator(val, b_size, shuffle=False, repeat=False) print('setup a model ...') chainer.cuda.get_device_from_id(device).use() model = setup_model(phrase_net, image_net) model.to_gpu() optimizer = chainer.optimizers.Adam(alpha=lr) optimizer.setup(model) if hasattr(model, 'vis_cnn'): model.vis_cnn.disable_update() # This line protects vgg paramters from weight decay. if w_decay: optimizer.add_hook( chainer.optimizer.WeightDecay(w_decay), 'hook_dec') updater = training.StandardUpdater(train_iter, optimizer,converter=conv_f, device=device) trainer = training.Trainer(updater, (epoch, 'epoch'), saveto) val_interval = (1, 'epoch') if san_check else (1000, 'iteration') log_interval = (1, 'iteration') if san_check else (10, 'iteration') plot_interval = (1, 'iteration') if san_check else (10, 'iteration') dataset_interval = (1, 'iteration') if san_check else (1000, 'iteration') trainer.extend(extensions.Evaluator(val_iter, model, converter=conv_f, device=device), trigger=val_interval) if not san_check: trainer.extend(extensions.ExponentialShift( 'alpha', 0.5), trigger=(1, 'epoch')) # # Comment out to enable visualization of a computational graph. # trainer.extend(extensions.dump_graph('main/loss')) if not san_check: ## Comment out next line to save a checkpoint at each epoch, which enable you to restart training loop from the saved point. Note that saving a checkpoint may cost a few minutes. trainer.extend(extensions.snapshot(), trigger=(1, 'epoch')) trainer.extend(extensions.snapshot_object(model, 'model_{.updater.iteration}'), trigger=val_interval) trainer.extend(extensions.snapshot_object( model, 'model'), trigger=training.triggers.MaxValueTrigger('validation/main/f1', trigger=val_interval)) trainer.extend(extensions.LogReport(trigger=log_interval)) trainer.extend(extensions.observe_lr(), trigger=log_interval) trainer.extend(extensions.PrintReport([ 'epoch', 'iteration', 'main/loss', 'validation/main/loss', 'main/f1', 'validation/main/f1', 'pr', 'lr' ]), trigger=log_interval) trainer.extend(extensions.PlotReport(['main/loss', 'validation/main/loss'], file_name='loss.png', trigger=plot_interval)) trainer.extend(extensions.PlotReport(['main/f1', 'validation/main/f1'], file_name='f1.png', trigger=plot_interval)) trainer.extend(extensions.ProgressBar(update_interval=10)) if args.resume: chainer.serializers.load_npz(args.resume, trainer) print('start training') trainer.run() chainer.serializers.save_npz(saveto+'final_model', model)
def main(): parser = argparse.ArgumentParser(description='chainer implementation of pix2pix') parser.add_argument('--batchsize', '-b', type=int, default=1, help='Number of images in each mini-batch') parser.add_argument('--epoch', '-e', type=int, default=200, help='Number of sweeps over the dataset to train') parser.add_argument('--gpu', '-g', type=int, default=-1, help='GPU ID (negative value indicates CPU)') parser.add_argument('--dataset', '-i', default='./facade/base', help='Directory of image files.') parser.add_argument('--out', '-o', default='result', help='Directory to output the result') parser.add_argument('--resume', '-r', default='', help='Resume the training from snapshot') parser.add_argument('--seed', type=int, default=0, help='Random seed') parser.add_argument('--snapshot_interval', type=int, default=1000, help='Interval of snapshot') parser.add_argument('--display_interval', type=int, default=100, help='Interval of displaying log to console') args = parser.parse_args() print('GPU: {}'.format(args.gpu)) print('# Minibatch-size: {}'.format(args.batchsize)) print('# epoch: {}'.format(args.epoch)) print('') # Set up a neural network to train enc = Encoder(in_ch=12) dec = Decoder(out_ch=3) dis = Discriminator(in_ch=12, out_ch=3) if args.gpu >= 0: chainer.cuda.get_device(args.gpu).use() # Make a specified GPU current enc.to_gpu() # Copy the model to the GPU dec.to_gpu() dis.to_gpu() # Setup an optimizer def make_optimizer(model, alpha=0.0002, beta1=0.5): optimizer = chainer.optimizers.Adam(alpha=alpha, beta1=beta1) optimizer.setup(model) optimizer.add_hook(chainer.optimizer.WeightDecay(0.00001), 'hook_dec') return optimizer opt_enc = make_optimizer(enc) opt_dec = make_optimizer(dec) opt_dis = make_optimizer(dis) train_d = FacadeDataset(args.dataset, data_range=(1,300)) test_d = FacadeDataset(args.dataset, data_range=(300,379)) #train_iter = chainer.iterators.MultiprocessIterator(train_d, args.batchsize, n_processes=4) #test_iter = chainer.iterators.MultiprocessIterator(test_d, args.batchsize, n_processes=4) train_iter = chainer.iterators.SerialIterator(train_d, args.batchsize) test_iter = chainer.iterators.SerialIterator(test_d, args.batchsize) # Set up a trainer updater = FacadeUpdater( models=(enc, dec, dis), iterator={ 'main': train_iter, 'test': test_iter}, optimizer={ 'enc': opt_enc, 'dec': opt_dec, 'dis': opt_dis}, device=args.gpu) trainer = training.Trainer(updater, (args.epoch, 'epoch'), out=args.out) snapshot_interval = (args.snapshot_interval, 'iteration') display_interval = (args.display_interval, 'iteration') trainer.extend(extensions.snapshot( filename='snapshot_iter_{.updater.iteration}.npz'), trigger=snapshot_interval) trainer.extend(extensions.snapshot_object( enc, 'enc_iter_{.updater.iteration}.npz'), trigger=snapshot_interval) trainer.extend(extensions.snapshot_object( dec, 'dec_iter_{.updater.iteration}.npz'), trigger=snapshot_interval) trainer.extend(extensions.snapshot_object( dis, 'dis_iter_{.updater.iteration}.npz'), trigger=snapshot_interval) trainer.extend(extensions.LogReport(trigger=display_interval)) trainer.extend(extensions.PrintReport([ 'epoch', 'iteration', 'enc/loss', 'dec/loss', 'dis/loss', ]), trigger=display_interval) trainer.extend(extensions.ProgressBar(update_interval=10)) trainer.extend( out_image( updater, enc, dec, 5, 5, args.seed, args.out), trigger=snapshot_interval) if args.resume: # Resume from a snapshot chainer.serializers.load_npz(args.resume, trainer) # Run the training trainer.run()
def main(): parser = argparse.ArgumentParser( description='chainer line drawing colorization') parser.add_argument('--batchsize', '-b', type=int, default=16, help='Number of images in each mini-batch') parser.add_argument('--epoch', '-e', type=int, default=500, help='Number of sweeps over the dataset to train') parser.add_argument('--gpu', '-g', type=int, default=-1, help='GPU ID (negative value indicates CPU)') parser.add_argument('--dataset', '-i', default='./images/', help='Directory of image files.') parser.add_argument('--out', '-o', default='./result', help='Directory to output the result') parser.add_argument('--resume', '-r', default='', help='Resume the training from snapshot') parser.add_argument('--seed', type=int, default=0, help='Random seed') parser.add_argument('--snapshot_interval', type=int, default=1000, help='Interval of snapshot') parser.add_argument('--display_interval', type=int, default=10, help='Interval of displaying log to console') parser.add_argument('--test_visual_interval', type=int, default=1000, help='Interval of drawing test images') parser.add_argument('--test_out', default='./test_result/', help='DIrectory to output test samples') parser.add_argument('--test_image_path', default='./test_samples/test_sample3/', help='Directory of image files for testing') args = parser.parse_args() print('GPU: {}'.format(args.gpu)) print('# Minibatch-size: {}'.format(args.batchsize)) print('# epoch: {}'.format(args.epoch)) print('') root = args.dataset gen = generator.GEN() #serializers.load_npz("result_cnn/gen_iter_2000", gen) #print('generator loaded') dataset = Rough2LineDatasetNote( "dat/paired_dataset.dat", root + "rough/", root + "line/", root + "note", train=True,size = 328) train_iter = chainer.iterators.SerialIterator(dataset, args.batchsize) if args.gpu >= 0: chainer.cuda.get_device(args.gpu).use() # Make a specified GPU current gen.to_gpu() # Copy the model to the GPU # Setup optimizer parameters. opt = optimizers.Adam(alpha=0.0001) opt.setup(gen) opt.add_hook(chainer.optimizer.WeightDecay(1e-5), 'hook_gen') # Set up a trainer updater = cnnUpdater( models=(gen), iterator={ 'main': train_iter, #'test': test_iter }, optimizer={ 'gen': opt,}, device=args.gpu) trainer = training.Trainer(updater, (args.epoch, 'epoch'), out=args.out) snapshot_interval = (args.snapshot_interval, 'iteration') snapshot_interval2 = (args.snapshot_interval * 2, 'iteration') trainer.extend(extensions.dump_graph('gen/loss')) trainer.extend(extensions.snapshot(), trigger=snapshot_interval2) trainer.extend(extensions.snapshot_object( gen, 'gen_iter_{.updater.iteration}'), trigger=snapshot_interval) trainer.extend(extensions.snapshot_object( opt, 'optimizer_'), trigger=snapshot_interval) trainer.extend(extensions.LogReport(trigger=(10, 'iteration'), )) trainer.extend(extensions.PrintReport( ['epoch', 'gen/loss', 'gen/loss_L'])) trainer.extend(extensions.ProgressBar(update_interval=10)) trainer.extend(test_samples_simplification(updater, gen, args.test_out, args.test_image_path), trigger=(args.test_visual_interval, 'iteration')) trainer.run() if args.resume: # Resume from a snapshot chainer.serializers.load_npz(args.resume, trainer) # Save the trained model chainer.serializers.save_npz(os.path.join(out, 'model_final'), gen) chainer.serializers.save_npz(os.path.join(out, 'optimizer_final'), opt)
def main(): parser = argparse.ArgumentParser() parser.add_argument('--batchsize', '-b', type=int, default=20, help='Number of examples in each mini batch') parser.add_argument('--bproplen', '-l', type=int, default=35, help='Number of words in each mini batch ' '(= length of truncated BPTT)') parser.add_argument('--epoch', '-e', type=int, default=39, help='Number of sweeps over the dataset to train') parser.add_argument('--gpu', '-g', type=int, default=-1, help='GPU ID (negative value indicates CPU)') parser.add_argument('--gradclip', '-c', type=float, default=5, help='Gradient norm threshold to clip') parser.add_argument('--out', '-o', default='result', help='Directory to output the result') parser.add_argument('--resume', '-r', default='', help='Resume the training from snapshot') parser.add_argument('--test', action='store_true', help='Use tiny datasets for quick tests') parser.set_defaults(test=False) parser.add_argument('--unit', '-u', type=int, default=650, help='Number of LSTM units in each layer') args = parser.parse_args() # Load the Penn Tree Bank long word sequence dataset train, val, test = chainer.datasets.get_ptb_words() n_vocab = max(train) + 1 # train is just an array of integers print('#vocab =', n_vocab) if args.test: train = train[:100] val = val[:100] test = test[:100] train_iter = ParallelSequentialIterator(train, args.batchsize) val_iter = ParallelSequentialIterator(val, 1, repeat=False) test_iter = ParallelSequentialIterator(test, 1, repeat=False) # Prepare an RNNLM model rnn = RNNForLM(n_vocab, args.unit) model = L.Classifier(rnn) model.compute_accuracy = False # we only want the perplexity if args.gpu >= 0: chainer.cuda.get_device(args.gpu).use() # make the GPU current model.to_gpu() # Set up an optimizer optimizer = chainer.optimizers.SGD(lr=1.0) optimizer.setup(model) optimizer.add_hook(chainer.optimizer.GradientClipping(args.gradclip)) # Set up a trainer updater = BPTTUpdater(train_iter, optimizer, args.bproplen, args.gpu) trainer = training.Trainer(updater, (args.epoch, 'epoch'), out=args.out) eval_model = model.copy() # Model with shared params and distinct states eval_rnn = eval_model.predictor eval_rnn.train = False trainer.extend(extensions.Evaluator( val_iter, eval_model, device=args.gpu, # Reset the RNN state at the beginning of each evaluation eval_hook=lambda _: eval_rnn.reset_state())) interval = 10 if args.test else 500 trainer.extend(extensions.LogReport(postprocess=compute_perplexity, trigger=(interval, 'iteration'))) trainer.extend(extensions.PrintReport( ['epoch', 'iteration', 'perplexity', 'val_perplexity'] ), trigger=(interval, 'iteration')) trainer.extend(extensions.ProgressBar( update_interval=1 if args.test else 10)) trainer.extend(extensions.snapshot()) trainer.extend(extensions.snapshot_object( model, 'model_iter_{.updater.iteration}')) if args.resume: chainer.serializers.load_npz(args.resume, trainer) trainer.run() # Evaluate the final model print('test') eval_rnn.reset_state() evaluator = extensions.Evaluator(test_iter, eval_model, device=args.gpu) result = evaluator() print('test perplexity:', np.exp(float(result['main/loss'])))
def main(): parser = argparse.ArgumentParser(description="DCGAN") parser.add_argument("--batchsize", "-b", type=int, default=128) parser.add_argument("--epoch", "-e", type=int, default=100) parser.add_argument("--gpu", "-g", type=int, default=0) parser.add_argument("--snapshot_interval", "-s", type=int, default=10) parser.add_argument("--display_interval", "-d", type=int, default=1) parser.add_argument("--n_dimz", "-z", type=int, default=100) parser.add_argument("--dataset", "-ds", type=str, default="cifar10") parser.add_argument("--seed", type=int, default=0) parser.add_argument("--out", "-o", type=str, default="result") parser.add_argument("--resume", '-r', default='') args = parser.parse_args() #import .py import Updater import Visualize if args.dataset == "mnist": import Network.mnist_net as Network else: import Network.cifar10_net as Network #print settings print("GPU:{}".format(args.gpu)) print("epoch:{}".format(args.epoch)) print("Minibatch_size:{}".format(args.batchsize)) print("Dataset:{}".format(args.dataset)) print('') out = os.path.join(args.out, args.dataset) #Set up NN gen = Network.Generator(n_hidden=args.n_dimz) dis = Network.Discriminator() if args.gpu >= 0: chainer.backends.cuda.get_device_from_id(args.gpu).use() gen.to_gpu() dis.to_gpu() #Make optimizer def make_optimizer(model, alpha=0.0002, beta1=0.5): optimizer = optimizers.Adam(alpha=alpha, beta1=beta1) #init_lr = alpha optimizer.setup(model) optimizer.add_hook(chainer.optimizer_hooks.WeightDecay(0.0001), 'hook_dec') return optimizer opt_gen = make_optimizer(gen) opt_dis = make_optimizer(dis) #Get dataset if args.dataset == "mnist": train, _ = mnist.get_mnist(withlabel=False, ndim=3, scale=255.) else: train, _ = chainer.datasets.get_cifar10(withlabel=False, scale=255.) #Setup iterator train_iter = iterators.SerialIterator(train, args.batchsize) #Setup updater updater = Updater.DCGANUpdater(models=(gen, dis), iterator=train_iter, optimizer={ 'gen': opt_gen, 'dis': opt_dis }, device=args.gpu) #Setup trainer trainer = training.Trainer(updater, (args.epoch, 'epoch'), out=out) snapshot_interval = (args.snapshot_interval, 'epoch') display_interval = (args.display_interval, 'epoch') trainer.extend( extensions.snapshot(filename='snapshot_epoch_{.updater.epoch}.npz'), trigger=(args.epoch, 'epoch')) trainer.extend(extensions.snapshot_object( gen, 'gen_epoch_{.updater.epoch}.npz'), trigger=snapshot_interval) trainer.extend(extensions.snapshot_object( dis, 'dis_epoch_{.updater.epoch}.npz'), trigger=snapshot_interval) trainer.extend(extensions.LogReport(trigger=display_interval)) trainer.extend(extensions.PrintReport( ['epoch', 'iteration', 'gen/loss', 'dis/loss', 'elapsed_time']), trigger=display_interval) trainer.extend(extensions.ProgressBar()) trainer.extend(Visualize.out_generated_image(gen, dis, 10, 10, args.seed, args.out, args.dataset), trigger=snapshot_interval) if args.resume: chainer.serializers.load_npz(args.resume, trainer) trainer.run()
def main(): archs = { 'alex': alex.Alex, 'alex_fp16': alex.AlexFp16, 'googlenet': googlenet.GoogLeNet, 'googlenetbn': googlenetbn.GoogLeNetBN, 'googlenetbn_fp16': googlenetbn.GoogLeNetBNFp16, 'nin': nin.NIN, 'resnet50': resnet50.ResNet50, 'resnext50': resnext50.ResNeXt50, } parser = argparse.ArgumentParser( description='Learning convnet from ILSVRC2012 dataset') parser.add_argument('train', help='Path to training image-label list file') parser.add_argument('val', help='Path to validation image-label list file') parser.add_argument('--arch', '-a', choices=archs.keys(), default='nin', help='Convnet architecture') parser.add_argument('--batchsize', '-B', type=int, default=32, help='Learning minibatch size') parser.add_argument('--epoch', '-E', type=int, default=10, help='Number of epochs to train') parser.add_argument('--device', '-d', type=str, default='-1', help='Device specifier. Either ChainerX device ' 'specifier or an integer. If non-negative integer, ' 'CuPy arrays with specified device id are used. If ' 'negative integer, NumPy arrays are used') parser.add_argument('--initmodel', help='Initialize the model from given file') parser.add_argument('--loaderjob', '-j', type=int, help='Number of parallel data loading processes') parser.add_argument('--mean', '-m', default='mean.npy', help='Mean file (computed by compute_mean.py)') parser.add_argument('--resume', '-r', default='', help='Initialize the trainer from given file') parser.add_argument('--out', '-o', default='result', help='Output directory') parser.add_argument('--root', '-R', default='.', help='Root directory path of image files') parser.add_argument('--val_batchsize', '-b', type=int, default=250, help='Validation minibatch size') parser.add_argument('--test', action='store_true') parser.set_defaults(test=False) parser.add_argument('--dali', action='store_true') parser.set_defaults(dali=False) group = parser.add_argument_group('deprecated arguments') group.add_argument('--gpu', '-g', type=int, nargs='?', const=0, help='GPU ID (negative value indicates CPU)') args = parser.parse_args() device = parse_device(args) print('Device: {}'.format(device)) print('# Minibatch-size: {}'.format(args.batchsize)) print('# epoch: {}'.format(args.epoch)) print('') # Initialize the model to train model = archs[args.arch]() if args.initmodel: print('Load model from {}'.format(args.initmodel)) chainer.serializers.load_npz(args.initmodel, model) model.to_device(device) device.use() # Load the mean file mean = np.load(args.mean) if args.dali: if not dali_util._dali_available: raise RuntimeError('DALI seems not available on your system.') num_threads = args.loaderjob if num_threads is None or num_threads <= 0: num_threads = 1 ch_mean = list(np.average(mean, axis=(1, 2))) ch_std = [255.0, 255.0, 255.0] # Setup DALI pipelines train_pipe = dali_util.DaliPipelineTrain( args.train, args.root, model.insize, args.batchsize, num_threads, args.gpu, True, mean=ch_mean, std=ch_std) val_pipe = dali_util.DaliPipelineVal( args.val, args.root, model.insize, args.val_batchsize, num_threads, args.gpu, False, mean=ch_mean, std=ch_std) train_iter = chainer.iterators.DaliIterator(train_pipe) val_iter = chainer.iterators.DaliIterator(val_pipe, repeat=False) # converter = dali_converter converter = dali_util.DaliConverter(mean=mean, crop_size=model.insize) else: # Load the dataset files train = PreprocessedDataset(args.train, args.root, mean, model.insize) val = PreprocessedDataset(args.val, args.root, mean, model.insize, False) # These iterators load the images with subprocesses running in parallel # to the training/validation. train_iter = chainer.iterators.MultiprocessIterator( train, args.batchsize, n_processes=args.loaderjob) val_iter = chainer.iterators.MultiprocessIterator( val, args.val_batchsize, repeat=False, n_processes=args.loaderjob) converter = dataset.concat_examples # Set up an optimizer optimizer = chainer.optimizers.MomentumSGD(lr=0.01, momentum=0.9) optimizer.setup(model) # Set up a trainer updater = training.updaters.StandardUpdater( train_iter, optimizer, converter=converter, device=device) trainer = training.Trainer(updater, (args.epoch, 'epoch'), args.out) val_interval = (1 if args.test else 100000), 'iteration' log_interval = (1 if args.test else 1000), 'iteration' trainer.extend(extensions.Evaluator(val_iter, model, converter=converter, device=device), trigger=val_interval) # TODO(sonots): Temporarily disabled for chainerx. Fix it. if not (chainerx.is_available() and isinstance(device, chainerx.Device)): trainer.extend(extensions.dump_graph('main/loss')) trainer.extend(extensions.snapshot(), trigger=val_interval) trainer.extend(extensions.snapshot_object( model, 'model_iter_{.updater.iteration}'), trigger=val_interval) # Be careful to pass the interval directly to LogReport # (it determines when to emit log rather than when to read observations) trainer.extend(extensions.LogReport(trigger=log_interval)) trainer.extend(extensions.observe_lr(), trigger=log_interval) trainer.extend(extensions.PrintReport([ 'epoch', 'iteration', 'main/loss', 'validation/main/loss', 'main/accuracy', 'validation/main/accuracy', 'lr' ]), trigger=log_interval) trainer.extend(extensions.ProgressBar(update_interval=10)) if args.resume: chainer.serializers.load_npz(args.resume, trainer) trainer.run()
def main(): archs = { 'alex': alex.Alex, 'alex_fp16': alex.AlexFp16, 'googlenet': googlenet.GoogLeNet, 'googlenetbn': googlenetbn.GoogLeNetBN, 'googlenetbn_fp16': googlenetbn.GoogLeNetBNFp16, 'nin': nin.NIN, 'resnet50': resnet50.ResNet50 } parser = argparse.ArgumentParser( description='Learning convnet from ILSVRC2012 dataset') parser.add_argument('train', help='Path to training image-label list file') parser.add_argument('val', help='Path to validation image-label list file') parser.add_argument('--arch', '-a', choices=archs.keys(), default='nin', help='Convnet architecture') parser.add_argument('--batchsize', '-B', type=int, default=32, help='Learning minibatch size') parser.add_argument('--epoch', '-E', type=int, default=10, help='Number of epochs to train') parser.add_argument('--gpu', '-g', type=int, default=-1, help='GPU ID (negative value indicates CPU') parser.add_argument('--initmodel', help='Initialize the model from given file') parser.add_argument('--loaderjob', '-j', type=int, help='Number of parallel data loading processes') parser.add_argument('--mean', '-m', default='mean.npy', help='Mean file (computed by compute_mean.py)') parser.add_argument('--resume', '-r', default='', help='Initialize the trainer from given file') parser.add_argument('--out', '-o', default='result', help='Output directory') parser.add_argument('--root', '-R', default='.', help='Root directory path of image files') parser.add_argument('--val_batchsize', '-b', type=int, default=250, help='Validation minibatch size') parser.add_argument('--test', action='store_true') parser.set_defaults(test=False) args = parser.parse_args() # Initialize the model to train model = archs[args.arch]() if args.initmodel: print('Load model from', args.initmodel) chainer.serializers.load_npz(args.initmodel, model) if args.gpu >= 0: chainer.cuda.get_device_from_id(args.gpu).use() # Make the GPU current model.to_gpu() # Load the datasets and mean file mean = np.load(args.mean) train = PreprocessedDataset(args.train, args.root, mean, model.insize) val = PreprocessedDataset(args.val, args.root, mean, model.insize, False) # These iterators load the images with subprocesses running in parallel to # the training/validation. train_iter = chainer.iterators.MultiprocessIterator( train, args.batchsize, n_processes=args.loaderjob) val_iter = chainer.iterators.MultiprocessIterator( val, args.val_batchsize, repeat=False, n_processes=args.loaderjob) # Set up an optimizer optimizer = chainer.optimizers.MomentumSGD(lr=0.01, momentum=0.9) optimizer.setup(model) # Set up a trainer updater = training.StandardUpdater(train_iter, optimizer, device=args.gpu) trainer = training.Trainer(updater, (args.epoch, 'epoch'), args.out) val_interval = (10 if args.test else 100000), 'iteration' log_interval = (10 if args.test else 1000), 'iteration' trainer.extend(extensions.Evaluator(val_iter, model, device=args.gpu), trigger=val_interval) trainer.extend(extensions.dump_graph('main/loss')) trainer.extend(extensions.snapshot(), trigger=val_interval) trainer.extend(extensions.snapshot_object( model, 'model_iter_{.updater.iteration}'), trigger=val_interval) # Be careful to pass the interval directly to LogReport # (it determines when to emit log rather than when to read observations) trainer.extend(extensions.LogReport(trigger=log_interval)) trainer.extend(extensions.observe_lr(), trigger=log_interval) trainer.extend(extensions.PrintReport([ 'epoch', 'iteration', 'main/loss', 'validation/main/loss', 'main/accuracy', 'validation/main/accuracy', 'lr' ]), trigger=log_interval) trainer.extend(extensions.ProgressBar(update_interval=10)) if args.resume: chainer.serializers.load_npz(args.resume, trainer) trainer.run()
def main(): args = parse_arguments() # Set up some useful variables that will be used later on. dataset_name = args.dataset method = args.method num_data = args.num_data n_unit = args.unit_num conv_layers = args.conv_layers task_type = molnet_default_config[dataset_name]['task_type'] model_filename = {'classification': 'classifier.pkl', 'regression': 'regressor.pkl'} print('Using dataset: {}...'.format(dataset_name)) # Set up some useful variables that will be used later on. if args.label: labels = args.label cache_dir = os.path.join('input', '{}_{}_{}'.format(dataset_name, method, labels)) class_num = len(labels) if isinstance(labels, list) else 1 else: labels = None cache_dir = os.path.join('input', '{}_{}_all'.format(dataset_name, method)) class_num = len(molnet_default_config[args.dataset]['tasks']) # Load the train and validation parts of the dataset. filenames = [dataset_part_filename(p, num_data) for p in ['train', 'valid']] paths = [os.path.join(cache_dir, f) for f in filenames] if all([os.path.exists(path) for path in paths]): dataset_parts = [] for path in paths: print('Loading cached dataset from {}.'.format(path)) dataset_parts.append(NumpyTupleDataset.load(path)) else: dataset_parts = download_entire_dataset(dataset_name, num_data, labels, method, cache_dir) train, valid = dataset_parts[0], dataset_parts[1] # # Scale the label values, if necessary. # if args.scale == 'standardize': # if task_type == 'regression': # print('Applying standard scaling to the labels.') # datasets, scaler = standardize_dataset_labels(datasets) # else: # print('Label scaling is not available for classification tasks.') # else: # print('No label scaling was selected.') # scaler = None # Set up the predictor. predictor = set_up_predictor(method, n_unit, conv_layers, class_num) # Set up the iterators. train_iter = iterators.SerialIterator(train, args.batchsize) valid_iter = iterators.SerialIterator(valid, args.batchsize, repeat=False, shuffle=False) # Load metrics for the current dataset. metrics = molnet_default_config[dataset_name]['metrics'] metrics_fun = {k: v for k, v in metrics.items() if isinstance(v, types.FunctionType)} loss_fun = molnet_default_config[dataset_name]['loss'] if task_type == 'regression': model = Regressor(predictor, lossfun=loss_fun, metrics_fun=metrics_fun, device=args.gpu) # TODO: Use standard scaler for regression task elif task_type == 'classification': model = Classifier(predictor, lossfun=loss_fun, metrics_fun=metrics_fun, device=args.gpu) else: raise ValueError('Invalid task type ({}) encountered when processing ' 'dataset ({}).'.format(task_type, dataset_name)) # Set up the optimizer. optimizer = optimizers.Adam() optimizer.setup(model) # Save model-related output to this directory. model_dir = os.path.join(args.out, os.path.basename(cache_dir)) if not os.path.exists(model_dir): os.makedirs(model_dir) # Set up the updater. updater = training.StandardUpdater(train_iter, optimizer, device=args.gpu, converter=concat_mols) # Set up the trainer. trainer = training.Trainer(updater, (args.epoch, 'epoch'), out=model_dir) trainer.extend(E.Evaluator(valid_iter, model, device=args.gpu, converter=concat_mols)) trainer.extend(E.snapshot(), trigger=(args.epoch, 'epoch')) trainer.extend(E.LogReport()) # Report various metrics. print_report_targets = ['epoch', 'main/loss', 'validation/main/loss'] for metric_name, metric_fun in metrics.items(): if isinstance(metric_fun, types.FunctionType): print_report_targets.append('main/' + metric_name) print_report_targets.append('validation/main/' + metric_name) elif issubclass(metric_fun, BatchEvaluator): trainer.extend(metric_fun(valid_iter, model, device=args.gpu, eval_func=predictor, converter=concat_mols, name='val', raise_value_error=False)) print_report_targets.append('val/main/' + metric_name) else: raise TypeError('{} is not a supported metrics function.' .format(type(metrics_fun))) print_report_targets.append('elapsed_time') # Augmented by Ishiguro # ToDo: consider go/no-go of the following block # (i) more reporting for val/evalutaion # (ii) best validation score snapshot if task_type == 'regression': if 'RMSE' in metric_name: trainer.extend(E.snapshot_object(model, "best_val_" + model_filename[task_type]), trigger=training.triggers.MinValueTrigger('validation/main/RMSE')) elif 'MAE' in metric_name: trainer.extend(E.snapshot_object(model, "best_val_" + model_filename[task_type]), trigger=training.triggers.MinValueTrigger('validation/main/MAE')) else: print("No validation metric defined?") assert(False) elif task_type == 'classification': train_eval_iter = iterators.SerialIterator(train, args.batchsize,repeat=False, shuffle=False) trainer.extend(ROCAUCEvaluator( train_eval_iter, predictor, eval_func=predictor, device=args.gpu, converter=concat_mols, name='train', pos_labels=1, ignore_labels=-1, raise_value_error=False)) # extension name='validation' is already used by `Evaluator`, # instead extension name `val` is used. trainer.extend(ROCAUCEvaluator( valid_iter, predictor, eval_func=predictor, device=args.gpu, converter=concat_mols, name='val', pos_labels=1, ignore_labels=-1)) print_report_targets.append('train/main/roc_auc') print_report_targets.append('validation/main/loss') print_report_targets.append('val/main/roc_auc') trainer.extend(E.snapshot_object(model, "best_val_" + model_filename[task_type]), trigger=training.triggers.MaxValueTrigger('val/main/roc_auc')) else: raise NotImplementedError( 'Not implemented task_type = {}'.format(task_type)) trainer.extend(E.PrintReport(print_report_targets)) trainer.extend(E.ProgressBar()) trainer.run() # Save the model's parameters. model_path = os.path.join(model_dir, model_filename[task_type]) print('Saving the trained model to {}...'.format(model_path)) model.save_pickle(model_path, protocol=args.protocol)
def main(): parser = argparse.ArgumentParser( formatter_class=argparse.ArgumentDefaultsHelpFormatter, ) parser.add_argument( 'dataset', choices=['visible+occlusion', 'synthetic', 'occlusion'], help='The dataset.', ) parser.add_argument('--model', '-m', choices=['vgg16', 'resnet50', 'resnet101'], default='resnet50', help='Base model of Mask R-CNN.') parser.add_argument('--pooling-func', '-p', choices=['pooling', 'align', 'resize'], default='align', help='Pooling function.') parser.add_argument('--gpu', '-g', type=int, help='GPU id.') parser.add_argument('--multi-node', action='store_true', help='use multi node') parser.add_argument('--mask-loss', default='softmax', choices=synthetic2d.models.MaskRCNN.mask_losses, help='mask loss mode') default_max_epoch = (180e3 * 8) / 118287 * 3 # x3 parser.add_argument('--max-epoch', type=float, default=default_max_epoch, help='epoch') args = parser.parse_args() if args.multi_node: import chainermn comm = chainermn.create_communicator('hierarchical') device = comm.intra_rank args.n_node = comm.inter_size args.n_gpu = comm.size chainer.cuda.get_device_from_id(device).use() else: args.n_node = 1 args.n_gpu = 1 chainer.cuda.get_device_from_id(args.gpu).use() device = args.gpu args.seed = 0 now = datetime.datetime.now() args.timestamp = now.isoformat() if not args.multi_node or comm.rank == 0: out = osp.join(here, 'logs', now.strftime('%Y%m%d_%H%M%S.%f')) else: out = None if args.multi_node: args.out = comm.bcast_obj(out) else: args.out = out del out # 0.00125 * 8 = 0.01 in original args.batch_size = 1 * args.n_gpu args.lr = 0.00125 * args.batch_size args.weight_decay = 0.0001 # lr / 10 at 120k iteration with # 160k iteration * 16 batchsize in original args.step_size = [(120e3 / 180e3) * args.max_epoch, (160e3 / 180e3) * args.max_epoch] random.seed(args.seed) np.random.seed(args.seed) # Default Config # args.min_size = 800 # args.max_size = 1333 # args.anchor_scales = (2, 4, 8, 16, 32) args.min_size = 600 args.max_size = 1000 args.anchor_scales = (4, 8, 16, 32) args.rpn_dim = 512 # ------------------------------------------------------------------------- # Dataset if args.dataset == 'visible+occlusion': train_data1 = instance_occlsegm_lib.datasets.apc.\ ARC2017InstanceSegmentationDataset('train', aug='standard') train_data1 = chainer.datasets.TransformDataset( train_data1, transform_visible_only_to_with_occlusion) train_data2 = instance_occlsegm_lib.datasets.apc.\ ARC2017InstanceSegmentationDataset('test', aug='standard') train_data2 = chainer.datasets.TransformDataset( train_data2, transform_visible_only_to_with_occlusion) train_data3 = synthetic2d.datasets.ARC2017OcclusionDataset('train', do_aug=True) train_data = chainer.datasets.ConcatenatedDataset( train_data1, train_data2, train_data3, ) elif args.dataset == 'synthetic': train_data = synthetic2d.datasets.ARC2017SyntheticInstancesDataset( do_aug=True, aug_level='all') elif args.dataset == 'occlusion': train_data = synthetic2d.datasets.ARC2017OcclusionDataset('train', do_aug=True) else: raise ValueError test_data = synthetic2d.datasets.ARC2017OcclusionDataset('test') fg_class_names = test_data.class_names test_data_list = test_data.get_video_datasets() del test_data # ------------------------------------------------------------------------- # Model + Optimizer. if args.pooling_func == 'align': pooling_func = cmr.functions.roi_align_2d elif args.pooling_func == 'pooling': pooling_func = chainer.functions.roi_pooling_2d elif args.pooling_func == 'resize': pooling_func = cmr.functions.crop_and_resize else: raise ValueError assert args.model in ['resnet50', 'resnet101'] n_layers = int(args.model.lstrip('resnet')) mask_rcnn = synthetic2d.models.MaskRCNNResNet( n_layers=n_layers, n_fg_class=len(fg_class_names), pooling_func=pooling_func, anchor_scales=args.anchor_scales, min_size=args.min_size, max_size=args.max_size, rpn_dim=args.rpn_dim, mask_loss=args.mask_loss, ) mask_rcnn.nms_thresh = 0.3 mask_rcnn.score_thresh = 0.05 model = synthetic2d.models.MaskRCNNTrainChain(mask_rcnn) if args.multi_node or args.gpu >= 0: model.to_gpu() optimizer = chainer.optimizers.MomentumSGD(lr=args.lr, momentum=0.9) if args.multi_node: optimizer = chainermn.create_multi_node_optimizer(optimizer, comm) optimizer.setup(model) optimizer.add_hook(chainer.optimizer.WeightDecay(rate=args.weight_decay)) mask_rcnn.extractor.conv1.disable_update() mask_rcnn.extractor.bn1.disable_update() mask_rcnn.extractor.res2.disable_update() for link in mask_rcnn.links(): if isinstance(link, cmr.links.AffineChannel2D): link.disable_update() # ------------------------------------------------------------------------- # Iterator. train_data = chainer.datasets.TransformDataset( train_data, cmr.datasets.MaskRCNNTransform(mask_rcnn)) test_data_list = [ chainer.datasets.TransformDataset( td, cmr.datasets.MaskRCNNTransform(mask_rcnn, train=False)) for td in test_data_list ] test_concat_data = chainer.datasets.ConcatenatedDataset(*test_data_list) if args.multi_node: if comm.rank != 0: train_data = None train_data = chainermn.scatter_dataset(train_data, comm, shuffle=True) # for training train_iter = chainer.iterators.SerialIterator(train_data, batch_size=1) # for evaluation test_iters = { i: chainer.iterators.SerialIterator(td, batch_size=1, repeat=False, shuffle=False) for i, td in enumerate(test_data_list) } # for visualization test_concat_iter = chainer.iterators.SerialIterator(test_concat_data, batch_size=1, repeat=False, shuffle=False) # ------------------------------------------------------------------------- converter = functools.partial( cmr.datasets.concat_examples, padding=0, # img, bboxes, labels, masks, scales indices_concat=[0, 2, 3, 4], # img, _, labels, masks, scales indices_to_device=[0, 1], # img, bbox ) updater = chainer.training.updater.StandardUpdater(train_iter, optimizer, device=device, converter=converter) trainer = training.Trainer(updater, (args.max_epoch, 'epoch'), out=args.out) trainer.extend(extensions.ExponentialShift('lr', 0.1), trigger=training.triggers.ManualScheduleTrigger( args.step_size, 'epoch')) eval_interval = 1, 'epoch' log_interval = 20, 'iteration' plot_interval = 0.1, 'epoch' print_interval = 20, 'iteration' if not args.multi_node or comm.rank == 0: evaluator = synthetic2d.extensions.InstanceSegmentationVOCEvaluator( test_iters, model.mask_rcnn, device=device, use_07_metric=False, label_names=fg_class_names) trainer.extend(evaluator, trigger=eval_interval) trainer.extend(extensions.snapshot_object(model.mask_rcnn, 'snapshot_model.npz'), trigger=training.triggers.MaxValueTrigger( 'validation/main/mpq', eval_interval)) args.git_hash = cmr.utils.git_hash() args.hostname = socket.gethostname() trainer.extend(fcn.extensions.ParamsReport(args.__dict__)) trainer.extend(synthetic2d.extensions.InstanceSegmentationVisReport( test_concat_iter, model.mask_rcnn, label_names=fg_class_names), trigger=eval_interval) trainer.extend(chainer.training.extensions.observe_lr(), trigger=log_interval) trainer.extend(extensions.LogReport(trigger=log_interval)) trainer.extend( extensions.PrintReport([ 'iteration', 'epoch', 'elapsed_time', 'lr', 'main/loss', 'main/roi_loc_loss', 'main/roi_cls_loss', 'main/roi_mask_loss', 'main/rpn_loc_loss', 'main/rpn_cls_loss', 'validation/main/mpq' ]), trigger=print_interval, ) trainer.extend(extensions.ProgressBar(update_interval=10)) # plot assert extensions.PlotReport.available() trainer.extend( extensions.PlotReport( [ 'main/loss', 'main/roi_loc_loss', 'main/roi_cls_loss', 'main/roi_mask_loss', 'main/rpn_loc_loss', 'main/rpn_cls_loss' ], file_name='loss.png', trigger=plot_interval, ), trigger=plot_interval, ) trainer.extend( extensions.PlotReport([ 'validation/main/map', 'validation/main/msq', 'validation/main/mdq', 'validation/main/mpq' ], file_name='accuracy.png', trigger=plot_interval), trigger=eval_interval, ) trainer.extend(extensions.dump_graph('main/loss')) trainer.run()