def train(args, print_log=False): chainer.CHAINER_SEED = args.seed numpy.random.seed(args.seed) vocab = None # Load a dataset if args.dataset == 'dbpedia': train, test, vocab = text_datasets.get_dbpedia(vocab=vocab) elif args.dataset.startswith('imdb.'): train, test, vocab = text_datasets.get_imdb( fine_grained=args.dataset.endswith('.fine'), vocab=vocab) elif args.dataset in [ 'TREC', 'stsa.binary', 'stsa.fine', 'custrev', 'mpqa', 'rt-polarity', 'subj' ]: train, test, real_test, vocab = text_datasets.read_text_dataset( args.dataset, vocab=None, dir=args.data_dir) #train, test, vocab = text_datasets.get_other_text_dataset( # args.dataset, vocab=vocab) #if args.validation: # real_test = test # dataset_pairs = chainer.datasets.get_cross_validation_datasets_random( # train, 10, seed=777) # train, test = dataset_pairs[0] print('# train data: {}'.format(len(train))) print('# test data: {}'.format(len(test))) print('# vocab: {}'.format(len(vocab))) n_class = len(set([int(d[1]) for d in train])) print('# class: {}'.format(n_class)) chainer.CHAINER_SEED = args.seed numpy.random.seed(args.seed) train = UnkDropout(train, vocab['<unk>'], 0.01) train_iter = chainer.iterators.SerialIterator(train, args.batchsize) test_iter = chainer.iterators.SerialIterator(test, args.batchsize, repeat=False, shuffle=False) # Setup a model chainer.CHAINER_SEED = args.seed numpy.random.seed(args.seed) if args.model == 'rnn': Encoder = class_nets.RNNEncoder elif args.model == 'cnn': Encoder = class_nets.CNNEncoder elif args.model == 'bow': Encoder = class_nets.BOWMLPEncoder encoder = Encoder(n_layers=args.layer, n_vocab=len(vocab), n_units=args.unit, dropout=args.dropout) model = class_nets.TextClassifier(encoder, n_class) if args.bilm: bilm = bilm_nets.BiLanguageModel(len(vocab), args.bilm_unit, args.bilm_layer, args.bilm_dropout) n_labels = len(set([int(v[1]) for v in test])) print('# labels =', n_labels) if not args.no_label: print('add label') bilm.add_label_condition_nets(n_labels, args.bilm_unit) else: print('not using label') chainer.serializers.load_npz(args.bilm, bilm) with model.encoder.init_scope(): initialW = numpy.array(model.encoder.embed.W.data) del model.encoder.embed model.encoder.embed = bilm_nets.PredictiveEmbed(len(vocab), args.unit, bilm, args.dropout, initialW=initialW) model.encoder.use_predict_embed = True model.encoder.embed.setup(mode=args.bilm_mode, temp=args.bilm_temp, word_lower_bound=0., gold_lower_bound=0., gumbel=args.bilm_gumbel, residual=args.bilm_residual, wordwise=args.bilm_wordwise, add_original=args.bilm_add_original, augment_ratio=args.bilm_ratio, ignore_unk=vocab['<unk>']) if args.gpu >= 0: # Make a specified GPU current chainer.cuda.get_device_from_id(args.gpu).use() model.to_gpu() # Copy the model to the GPU model.xp.random.seed(args.seed) chainer.CHAINER_SEED = args.seed numpy.random.seed(args.seed) # Setup an optimizer optimizer = chainer.optimizers.Adam(args.learning_rate) optimizer.setup(model) # Set up a trainer updater = training.StandardUpdater(train_iter, optimizer, converter=convert_seq, device=args.gpu) from triggers import FailMaxValueTrigger stop_trigger = FailMaxValueTrigger(key='validation/main/accuracy', trigger=(1, 'epoch'), n_times=args.stop_epoch, max_trigger=args.epoch) trainer = training.Trainer(updater, stop_trigger, out=args.output_dir) # Evaluate the model with the test dataset for each epoch # VALIDATION SET trainer.extend( MicroEvaluator(test_iter, model, converter=convert_seq, device=args.gpu)) if args.validation: real_test_iter = chainer.iterators.SerialIterator(real_test, args.batchsize, repeat=False, shuffle=False) eval_on_real_test = MicroEvaluator(real_test_iter, model, converter=convert_seq, device=args.gpu) eval_on_real_test.default_name = 'test' trainer.extend(eval_on_real_test) # Take a best snapshot record_trigger = training.triggers.MaxValueTrigger( 'validation/main/accuracy', (1, 'epoch')) if args.save_model: trainer.extend(extensions.snapshot_object(model, 'best_model.npz'), trigger=record_trigger) # Write a log of evaluation statistics for each epoch out = Outer() trainer.extend( extensions.LogReport(filename=args.output_dir + '/classifier.log')) if print_log: trainer.extend( extensions.PrintReport( [ 'epoch', 'main/loss', 'validation/main/loss', 'test/main/loss', 'main/accuracy', 'validation/main/accuracy', 'test/main/accuracy' #, 'elapsed_time' ], out=out), trigger=record_trigger) else: trainer.extend(extensions.PrintReport([ 'main/accuracy', 'validation/main/accuracy', 'test/main/accuracy' ], out=out), trigger=record_trigger) # Print a progress bar to stdout #trainer.extend(extensions.ProgressBar()) # Run the training trainer.run() # free all unused memory blocks “cached” in the memory pool mempool = cupy.get_default_memory_pool() mempool.free_all_blocks() print("val_acc:{}, test_acc:{}\n", out[-2], out[-1]) return float(out[-1])
def train(args): """Train with the given args. Args: args (namespace): The program arguments. """ set_deterministic_pytorch(args) # check cuda availability if not torch.cuda.is_available(): logging.warning("cuda is not available") # get input and output dimension info with open(args.valid_json, "rb") as f: valid_json = json.load(f)["utts"] utts = list(valid_json.keys()) idim = int(valid_json[utts[0]]["input"][0]["shape"][-1]) odim = int(valid_json[utts[0]]["output"][0]["shape"][-1]) logging.info("#input dims : " + str(idim)) logging.info("#output dims: " + str(odim)) # Initialize with pre-trained ASR encoder and MT decoder if args.enc_init is not None or args.dec_init is not None: model = load_trained_modules(idim, odim, args, interface=STInterface) else: model_class = dynamic_import(args.model_module) model = model_class(idim, odim, args) assert isinstance(model, STInterface) if args.rnnlm is not None: rnnlm_args = get_model_conf(args.rnnlm, args.rnnlm_conf) rnnlm = lm_pytorch.ClassifierWithState( lm_pytorch.RNNLM(len(args.char_list), rnnlm_args.layer, rnnlm_args.unit)) torch_load(args.rnnlm, rnnlm) model.rnnlm = rnnlm # write model config if not os.path.exists(args.outdir): os.makedirs(args.outdir) model_conf = args.outdir + "/model.json" with open(model_conf, "wb") as f: logging.info("writing a model config file to " + model_conf) f.write( json.dumps((idim, odim, vars(args)), indent=4, ensure_ascii=False, sort_keys=True).encode("utf_8")) for key in sorted(vars(args).keys()): logging.info("ARGS: " + key + ": " + str(vars(args)[key])) reporter = model.reporter # check the use of multi-gpu if args.ngpu > 1: if args.batch_size != 0: logging.warning( "batch size is automatically increased (%d -> %d)" % (args.batch_size, args.batch_size * args.ngpu)) args.batch_size *= args.ngpu # set torch device device = torch.device("cuda" if args.ngpu > 0 else "cpu") if args.train_dtype in ("float16", "float32", "float64"): dtype = getattr(torch, args.train_dtype) else: dtype = torch.float32 model = model.to(device=device, dtype=dtype) # Setup an optimizer if args.opt == "adadelta": optimizer = torch.optim.Adadelta(model.parameters(), rho=0.95, eps=args.eps, weight_decay=args.weight_decay) elif args.opt == "adam": optimizer = torch.optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.weight_decay) elif args.opt == "noam": from espnet.nets.pytorch_backend.transformer.optimizer import get_std_opt optimizer = get_std_opt( model.parameters(), args.adim, args.transformer_warmup_steps, args.transformer_lr, ) else: raise NotImplementedError("unknown optimizer: " + args.opt) # setup apex.amp if args.train_dtype in ("O0", "O1", "O2", "O3"): try: from apex import amp except ImportError as e: logging.error( f"You need to install apex for --train-dtype {args.train_dtype}. " "See https://github.com/NVIDIA/apex#linux") raise e if args.opt == "noam": model, optimizer.optimizer = amp.initialize( model, optimizer.optimizer, opt_level=args.train_dtype) else: model, optimizer = amp.initialize(model, optimizer, opt_level=args.train_dtype) use_apex = True else: use_apex = False # FIXME: TOO DIRTY HACK setattr(optimizer, "target", reporter) setattr(optimizer, "serialize", lambda s: reporter.serialize(s)) # Setup a converter converter = CustomConverter( subsampling_factor=model.subsample[0], dtype=dtype, use_source_text=args.asr_weight > 0 or args.mt_weight > 0, ) # read json data with open(args.train_json, "rb") as f: train_json = json.load(f)["utts"] with open(args.valid_json, "rb") as f: valid_json = json.load(f)["utts"] use_sortagrad = args.sortagrad == -1 or args.sortagrad > 0 # make minibatch list (variable length) train = make_batchset( train_json, args.batch_size, args.maxlen_in, args.maxlen_out, args.minibatches, min_batch_size=args.ngpu if args.ngpu > 1 else 1, shortest_first=use_sortagrad, count=args.batch_count, batch_bins=args.batch_bins, batch_frames_in=args.batch_frames_in, batch_frames_out=args.batch_frames_out, batch_frames_inout=args.batch_frames_inout, iaxis=0, oaxis=0, ) valid = make_batchset( valid_json, args.batch_size, args.maxlen_in, args.maxlen_out, args.minibatches, min_batch_size=args.ngpu if args.ngpu > 1 else 1, count=args.batch_count, batch_bins=args.batch_bins, batch_frames_in=args.batch_frames_in, batch_frames_out=args.batch_frames_out, batch_frames_inout=args.batch_frames_inout, iaxis=0, oaxis=0, ) load_tr = LoadInputsAndTargets( mode="asr", load_output=True, preprocess_conf=args.preprocess_conf, preprocess_args={"train": True}, # Switch the mode of preprocessing ) load_cv = LoadInputsAndTargets( mode="asr", load_output=True, preprocess_conf=args.preprocess_conf, preprocess_args={"train": False}, # Switch the mode of preprocessing ) # hack to make batchsize argument as 1 # actual bathsize is included in a list # default collate function converts numpy array to pytorch tensor # we used an empty collate function instead which returns list train_iter = ChainerDataLoader( dataset=TransformDataset(train, lambda data: converter([load_tr(data)])), batch_size=1, num_workers=args.n_iter_processes, shuffle=not use_sortagrad, collate_fn=lambda x: x[0], ) valid_iter = ChainerDataLoader( dataset=TransformDataset(valid, lambda data: converter([load_cv(data)])), batch_size=1, shuffle=False, collate_fn=lambda x: x[0], num_workers=args.n_iter_processes, ) # Set up a trainer updater = CustomUpdater( model, args.grad_clip, {"main": train_iter}, optimizer, device, args.ngpu, args.grad_noise, args.accum_grad, use_apex=use_apex, ) trainer = training.Trainer(updater, (args.epochs, "epoch"), out=args.outdir) if use_sortagrad: trainer.extend( ShufflingEnabler([train_iter]), trigger=(args.sortagrad if args.sortagrad != -1 else args.epochs, "epoch"), ) # Resume from a snapshot if args.resume: logging.info("resumed from %s" % args.resume) torch_resume(args.resume, trainer) # Evaluate the model with the test dataset for each epoch if args.save_interval_iters > 0: trainer.extend( CustomEvaluator(model, {"main": valid_iter}, reporter, device, args.ngpu), trigger=(args.save_interval_iters, "iteration"), ) else: trainer.extend( CustomEvaluator(model, {"main": valid_iter}, reporter, device, args.ngpu)) # Save attention weight at each epoch if args.num_save_attention > 0: data = sorted( list(valid_json.items())[:args.num_save_attention], key=lambda x: int(x[1]["input"][0]["shape"][1]), reverse=True, ) if hasattr(model, "module"): att_vis_fn = model.module.calculate_all_attentions plot_class = model.module.attention_plot_class else: att_vis_fn = model.calculate_all_attentions plot_class = model.attention_plot_class att_reporter = plot_class( att_vis_fn, data, args.outdir + "/att_ws", converter=converter, transform=load_cv, device=device, ) trainer.extend(att_reporter, trigger=(1, "epoch")) else: att_reporter = None # Save CTC prob at each epoch if (args.asr_weight > 0 and args.mtlalpha > 0) and args.num_save_ctc > 0: # NOTE: sort it by output lengths data = sorted( list(valid_json.items())[:args.num_save_ctc], key=lambda x: int(x[1]["output"][0]["shape"][0]), reverse=True, ) if hasattr(model, "module"): ctc_vis_fn = model.module.calculate_all_ctc_probs plot_class = model.module.ctc_plot_class else: ctc_vis_fn = model.calculate_all_ctc_probs plot_class = model.ctc_plot_class ctc_reporter = plot_class( ctc_vis_fn, data, args.outdir + "/ctc_prob", converter=converter, transform=load_cv, device=device, ikey="output", iaxis=1, ) trainer.extend(ctc_reporter, trigger=(1, "epoch")) else: ctc_reporter = None # Make a plot for training and validation values trainer.extend( extensions.PlotReport( [ "main/loss", "validation/main/loss", "main/loss_asr", "validation/main/loss_asr", "main/loss_mt", "validation/main/loss_mt", "main/loss_st", "validation/main/loss_st", ], "epoch", file_name="loss.png", )) trainer.extend( extensions.PlotReport( [ "main/acc", "validation/main/acc", "main/acc_asr", "validation/main/acc_asr", "main/acc_mt", "validation/main/acc_mt", ], "epoch", file_name="acc.png", )) trainer.extend( extensions.PlotReport(["main/bleu", "validation/main/bleu"], "epoch", file_name="bleu.png")) # Save best models trainer.extend( snapshot_object(model, "model.loss.best"), trigger=training.triggers.MinValueTrigger("validation/main/loss"), ) trainer.extend( snapshot_object(model, "model.acc.best"), trigger=training.triggers.MaxValueTrigger("validation/main/acc"), ) # save snapshot which contains model and optimizer states if args.save_interval_iters > 0: trainer.extend( torch_snapshot(filename="snapshot.iter.{.updater.iteration}"), trigger=(args.save_interval_iters, "iteration"), ) else: trainer.extend(torch_snapshot(), trigger=(1, "epoch")) # epsilon decay in the optimizer if args.opt == "adadelta": if args.criterion == "acc": trainer.extend( restore_snapshot(model, args.outdir + "/model.acc.best", load_fn=torch_load), trigger=CompareValueTrigger( "validation/main/acc", lambda best_value, current_value: best_value > current_value, ), ) trainer.extend( adadelta_eps_decay(args.eps_decay), trigger=CompareValueTrigger( "validation/main/acc", lambda best_value, current_value: best_value > current_value, ), ) elif args.criterion == "loss": trainer.extend( restore_snapshot(model, args.outdir + "/model.loss.best", load_fn=torch_load), trigger=CompareValueTrigger( "validation/main/loss", lambda best_value, current_value: best_value < current_value, ), ) trainer.extend( adadelta_eps_decay(args.eps_decay), trigger=CompareValueTrigger( "validation/main/loss", lambda best_value, current_value: best_value < current_value, ), ) elif args.opt == "adam": if args.criterion == "acc": trainer.extend( restore_snapshot(model, args.outdir + "/model.acc.best", load_fn=torch_load), trigger=CompareValueTrigger( "validation/main/acc", lambda best_value, current_value: best_value > current_value, ), ) trainer.extend( adam_lr_decay(args.lr_decay), trigger=CompareValueTrigger( "validation/main/acc", lambda best_value, current_value: best_value > current_value, ), ) elif args.criterion == "loss": trainer.extend( restore_snapshot(model, args.outdir + "/model.loss.best", load_fn=torch_load), trigger=CompareValueTrigger( "validation/main/loss", lambda best_value, current_value: best_value < current_value, ), ) trainer.extend( adam_lr_decay(args.lr_decay), trigger=CompareValueTrigger( "validation/main/loss", lambda best_value, current_value: best_value < current_value, ), ) # Write a log of evaluation statistics for each epoch trainer.extend( extensions.LogReport(trigger=(args.report_interval_iters, "iteration"))) report_keys = [ "epoch", "iteration", "main/loss", "main/loss_st", "main/loss_asr", "validation/main/loss", "validation/main/loss_st", "validation/main/loss_asr", "main/acc", "validation/main/acc", ] if args.asr_weight > 0: report_keys.append("main/acc_asr") report_keys.append("validation/main/acc_asr") report_keys += ["elapsed_time"] if args.opt == "adadelta": trainer.extend( extensions.observe_value( "eps", lambda trainer: trainer.updater.get_optimizer("main"). param_groups[0]["eps"], ), trigger=(args.report_interval_iters, "iteration"), ) report_keys.append("eps") elif args.opt in ["adam", "noam"]: trainer.extend( extensions.observe_value( "lr", lambda trainer: trainer.updater.get_optimizer("main"). param_groups[0]["lr"], ), trigger=(args.report_interval_iters, "iteration"), ) report_keys.append("lr") if args.asr_weight > 0: if args.mtlalpha > 0: report_keys.append("main/cer_ctc") report_keys.append("validation/main/cer_ctc") if args.mtlalpha < 1: if args.report_cer: report_keys.append("validation/main/cer") if args.report_wer: report_keys.append("validation/main/wer") if args.report_bleu: report_keys.append("main/bleu") report_keys.append("validation/main/bleu") trainer.extend( extensions.PrintReport(report_keys), trigger=(args.report_interval_iters, "iteration"), ) trainer.extend( extensions.ProgressBar(update_interval=args.report_interval_iters)) set_early_stop(trainer, args) if args.tensorboard_dir is not None and args.tensorboard_dir != "": trainer.extend( TensorboardLogger( SummaryWriter(args.tensorboard_dir), att_reporter=att_reporter, ctc_reporter=ctc_reporter, ), trigger=(args.report_interval_iters, "iteration"), ) # Run the training trainer.run() check_early_stop(trainer, args.epochs)
def train(args): '''Run training''' # seed setting torch.manual_seed(args.seed) # debug mode setting # 0 would be fastest, but 1 seems to be reasonable # by considering reproducability # revmoe type check if args.debugmode < 2: chainer.config.type_check = False logging.info('torch type check is disabled') # use determinisitic computation or not if args.debugmode < 1: torch.backends.cudnn.deterministic = False logging.info('torch cudnn deterministic is disabled') else: torch.backends.cudnn.deterministic = True # check cuda availability if not torch.cuda.is_available(): logging.warning('cuda is not available') # get input and output dimension info with open(args.valid_label, 'rb') as f: valid_json = json.load(f)['utts'] utts = list(valid_json.keys()) idim = int(valid_json[utts[0]]['idim']) odim = int(valid_json[utts[0]]['odim']) logging.info('#input dims : ' + str(idim)) logging.info('#output dims: ' + str(odim)) # specify attention, CTC, hybrid mode if args.mtlalpha == 1.0: mtl_mode = 'ctc' logging.info('Pure CTC mode') elif args.mtlalpha == 0.0: mtl_mode = 'att' logging.info('Pure attention mode') else: mtl_mode = 'mtl' logging.info('Multitask learning mode') # specify model architecture e2e = E2E(idim, odim, args) model = Loss(e2e, args.mtlalpha) # write model config if not os.path.exists(args.outdir): os.makedirs(args.outdir) model_conf = args.outdir + '/model.conf' with open(model_conf, 'wb') as f: logging.info('writing a model config file to' + model_conf) # TODO(watanabe) use others than pickle, possibly json, and save as a text pickle.dump((idim, odim, args), f) for key in sorted(vars(args).keys()): logging.info('ARGS: ' + key + ': ' + str(vars(args)[key])) # Set gpu reporter = model.reporter ngpu = args.ngpu if ngpu == 1: gpu_id = range(ngpu) logging.info('gpu id: ' + str(gpu_id)) model.cuda() elif ngpu > 1: gpu_id = range(ngpu) logging.info('gpu id: ' + str(gpu_id)) model = DataParallel(model, device_ids=gpu_id) model.cuda() logging.info('batch size is automatically increased (%d -> %d)' % (args.batch_size, args.batch_size * args.ngpu)) args.batch_size *= args.ngpu else: gpu_id = [-1] # Setup an optimizer if args.opt == 'adadelta': optimizer = torch.optim.Adadelta(model.parameters(), rho=0.95, eps=args.eps) elif args.opt == 'adam': optimizer = torch.optim.Adam(model.parameters()) # FIXME: TOO DIRTY HACK setattr(optimizer, "target", reporter) setattr(optimizer, "serialize", lambda s: reporter.serialize(s)) # read json data with open(args.train_label, 'rb') as f: train_json = json.load(f)['utts'] with open(args.valid_label, 'rb') as f: valid_json = json.load(f)['utts'] # make minibatch list (variable length) train = make_batchset(train_json, args.batch_size, args.maxlen_in, args.maxlen_out, args.minibatches) valid = make_batchset(valid_json, args.batch_size, args.maxlen_in, args.maxlen_out, args.minibatches) # hack to make batchsze argument as 1 # actual bathsize is included in a list train_iter = chainer.iterators.SerialIterator(train, 1) valid_iter = chainer.iterators.SerialIterator(valid, 1, repeat=False, shuffle=False) # prepare Kaldi reader train_reader = lazy_io.read_dict_scp(args.train_feat) valid_reader = lazy_io.read_dict_scp(args.valid_feat) # Set up a trainer updater = PytorchSeqUpdaterKaldi(model, args.grad_clip, train_iter, optimizer, train_reader, gpu_id) trainer = training.Trainer(updater, (args.epochs, 'epoch'), out=args.outdir) # Resume from a snapshot if args.resume: chainer.serializers.load_npz(args.resume, trainer) if ngpu > 1: model.module.load_state_dict( torch.load(args.outdir + '/model.acc.best')) else: model.load_state_dict(torch.load(args.outdir + '/model.acc.best')) model = trainer.updater.model # Evaluate the model with the test dataset for each epoch trainer.extend( PytorchSeqEvaluaterKaldi(model, valid_iter, reporter, valid_reader, device=gpu_id)) # Take a snapshot for each specified epoch trainer.extend(extensions.snapshot(), trigger=(1, 'epoch')) # Make a plot for training and validation values trainer.extend( extensions.PlotReport([ 'main/loss', 'validation/main/loss', 'main/loss_ctc', 'validation/main/loss_ctc', 'main/loss_att', 'validation/main/loss_att' ], 'epoch', file_name='loss.png')) trainer.extend( extensions.PlotReport(['main/acc', 'validation/main/acc'], 'epoch', file_name='acc.png')) # Save best models def torch_save(path, _): if ngpu > 1: torch.save(model.module.state_dict(), path) torch.save(model.module, path + ".pkl") else: torch.save(model.state_dict(), path) torch.save(model, path + ".pkl") trainer.extend( extensions.snapshot_object(model, 'model.loss.best', savefun=torch_save), trigger=training.triggers.MinValueTrigger('validation/main/loss')) if mtl_mode is not 'ctc': trainer.extend( extensions.snapshot_object(model, 'model.acc.best', savefun=torch_save), trigger=training.triggers.MaxValueTrigger('validation/main/acc')) # epsilon decay in the optimizer def torch_load(path, obj): if ngpu > 1: model.module.load_state_dict(torch.load(path)) else: model.load_state_dict(torch.load(path)) return obj if args.opt == 'adadelta': if args.criterion == 'acc' and mtl_mode is not 'ctc': trainer.extend(restore_snapshot(model, args.outdir + '/model.acc.best', load_fn=torch_load), trigger=CompareValueTrigger( 'validation/main/acc', lambda best_value, current_value: best_value > current_value)) trainer.extend(adadelta_eps_decay(args.eps_decay), trigger=CompareValueTrigger( 'validation/main/acc', lambda best_value, current_value: best_value > current_value)) elif args.criterion == 'loss': trainer.extend(restore_snapshot(model, args.outdir + '/model.loss.best', load_fn=torch_load), trigger=CompareValueTrigger( 'validation/main/loss', lambda best_value, current_value: best_value < current_value)) trainer.extend(adadelta_eps_decay(args.eps_decay), trigger=CompareValueTrigger( 'validation/main/loss', lambda best_value, current_value: best_value < current_value)) # Write a log of evaluation statistics for each epoch trainer.extend(extensions.LogReport(trigger=(100, 'iteration'))) report_keys = [ 'epoch', 'iteration', 'main/loss', 'main/loss_ctc', 'main/loss_att', 'validation/main/loss', 'validation/main/loss_ctc', 'validation/main/loss_att', 'main/acc', 'validation/main/acc', 'elapsed_time' ] if args.opt == 'adadelta': trainer.extend(extensions.observe_value( 'eps', lambda trainer: trainer.updater.get_optimizer('main'). param_groups[0]["eps"]), trigger=(100, 'iteration')) report_keys.append('eps') trainer.extend(extensions.PrintReport(report_keys), trigger=(100, 'iteration')) trainer.extend(extensions.ProgressBar()) # Run the training trainer.run()
optimizer, device=gpu_id) ##########################################Setup the Trainer trainer = training.Trainer(updater, (MAX_EPOCH, 'epoch'), out='mnist_result') ##########################################Add extentions to the Trainer object trainer.extend(extensions.LogReport()) trainer.extend(extensions.snapshot(filename='snapshot_epoch-{.updater.epoch}')) trainer.extend( extensions.snapshot_object(model.predictor, filename='model_epoch-{.updater.epoch}')) trainer.extend(extensions.Evaluator(test_iter, model, device=gpu_id)) trainer.extend( extensions.PrintReport([ 'epoch', 'main/loss', 'main/accuracy', 'validation/main/loss', 'validation/main/accuracy', 'elapsed_time' ])) trainer.extend( extensions.PlotReport(['main/loss', 'validation/main/loss'], x_key='epoch', file_name='loss.png')) trainer.extend( extensions.PlotReport(['main/accuracy', 'validation/main/accuracy'], x_key='epoch', file_name='accuracy.png')) trainer.extend(extensions.DumpGraph('main/loss')) ##########################################Start trainig trainer.run()
def main(): parser = argparse.ArgumentParser( description='ChainerCV training example: FCIS') parser.add_argument('--out', '-o', default='result', help='Output directory') parser.add_argument('--seed', '-s', type=int, default=0) parser.add_argument('--lr', '-l', type=float, default=None, help='Learning rate for multi GPUs') parser.add_argument('--batchsize', type=int, default=8) parser.add_argument('--epoch', '-e', type=int, default=42) parser.add_argument('--cooldown-epoch', '-ce', type=int, default=28) args = parser.parse_args() # https://docs.chainer.org/en/stable/chainermn/tutorial/tips_faqs.html#using-multiprocessiterator if hasattr(multiprocessing, 'set_start_method'): multiprocessing.set_start_method('forkserver') p = multiprocessing.Process() p.start() p.join() # chainermn comm = chainermn.create_communicator('pure_nccl') device = comm.intra_rank np.random.seed(args.seed) # model fcis = FCISResNet101(n_fg_class=len(sbd_instance_segmentation_label_names), pretrained_model='imagenet', iter2=False) fcis.use_preset('evaluate') model = FCISTrainChain(fcis) chainer.cuda.get_device_from_id(device).use() model.to_gpu() # dataset train_dataset = TransformDataset( SBDInstanceSegmentationDataset(split='train'), ('img', 'mask', 'label', 'bbox', 'scale'), Transform(model.fcis)) if comm.rank == 0: indices = np.arange(len(train_dataset)) else: indices = None indices = chainermn.scatter_dataset(indices, comm, shuffle=True) train_dataset = train_dataset.slice[indices] train_iter = chainer.iterators.SerialIterator(train_dataset, batch_size=args.batchsize // comm.size) if comm.rank == 0: test_dataset = SBDInstanceSegmentationDataset(split='val') test_iter = chainer.iterators.SerialIterator(test_dataset, batch_size=1, repeat=False, shuffle=False) # optimizer optimizer = chainermn.create_multi_node_optimizer( chainer.optimizers.MomentumSGD(lr=args.lr, momentum=0.9), comm) optimizer.setup(model) model.fcis.head.conv1.W.update_rule.add_hook(GradientScaling(3.0)) model.fcis.head.conv1.b.update_rule.add_hook(GradientScaling(3.0)) optimizer.add_hook(chainer.optimizer.WeightDecay(rate=0.0005)) for param in model.params(): if param.name in ['beta', 'gamma']: param.update_rule.enabled = False model.fcis.extractor.conv1.disable_update() model.fcis.extractor.res2.disable_update() updater = chainer.training.updater.StandardUpdater( train_iter, optimizer, converter=concat_examples, device=device) trainer = chainer.training.Trainer(updater, (args.epoch, 'epoch'), out=args.out) @make_shift('lr') def lr_scheduler(trainer): if args.lr is None: base_lr = 0.0005 * args.batchsize else: base_lr = args.lr epoch = trainer.updater.epoch if epoch < args.cooldown_epoch: rate = 1 else: rate = 0.1 return rate * base_lr trainer.extend(lr_scheduler) if comm.rank == 0: # interval log_interval = 100, 'iteration' plot_interval = 3000, 'iteration' print_interval = 20, 'iteration' # training extensions trainer.extend(extensions.snapshot_object( model.fcis, filename='snapshot_model.npz'), trigger=(args.epoch, 'epoch')) trainer.extend(extensions.observe_lr(), trigger=log_interval) trainer.extend( extensions.LogReport(log_name='log.json', trigger=log_interval)) trainer.extend(extensions.PrintReport([ 'iteration', 'epoch', 'elapsed_time', 'lr', 'main/loss', 'main/rpn_loc_loss', 'main/rpn_cls_loss', 'main/roi_loc_loss', 'main/roi_cls_loss', 'main/roi_mask_loss', 'validation/main/map', ]), trigger=print_interval) trainer.extend(extensions.ProgressBar(update_interval=10)) if extensions.PlotReport.available(): trainer.extend(extensions.PlotReport(['main/loss'], file_name='loss.png', trigger=plot_interval), trigger=plot_interval) trainer.extend(InstanceSegmentationVOCEvaluator( test_iter, model.fcis, iou_thresh=0.5, use_07_metric=True, label_names=sbd_instance_segmentation_label_names), trigger=ManualScheduleTrigger([ len(train_dataset) * args.cooldown_epoch, len(train_dataset) * args.epoch ], 'iteration')) trainer.extend(extensions.dump_graph('main/loss')) trainer.run()
def main(): # cuDNNのautotuneを有効にする chainer.cuda.set_max_workspace_size(512 * 1024 * 1024) chainer.config.autotune = True gpu_id = 0 batchsize = 6 out_num = 'results' log_interval = 1, 'epoch' epoch_max = 500 initial_lr = 0.0001 lr_decay_rate = 0.1 lr_decay_timing = [200, 300, 400] # モデルの設定 model = SSD300(n_fg_class=len(voc_labels), pretrained_model='imagenet') model.use_preset('evaluate') train_chain = MultiboxTrainChain(model) # GPUの設定 chainer.cuda.get_device_from_id(gpu_id).use() model.to_gpu() # データセットの設定 train_dataset = MyVoTTVOCDataset( 'C:\Python_Programs\chainer_practice\Telescope_corner', 'train') valid_dataset = MyVoTTVOCDataset( 'C:\Python_Programs\chainer_practice\Telescope_corner', 'val') # データ拡張 transformed_train_dataset = TransformDataset( train_dataset, Transform(model.coder, model.insize, model.mean)) # イテレーターの設定 train_iter = chainer.iterators.MultiprocessIterator( transformed_train_dataset, batchsize) valid_iter = chainer.iterators.SerialIterator(valid_dataset, batchsize, repeat=False, shuffle=False) # オプティマイザーの設定 optimizer = chainer.optimizers.MomentumSGD() optimizer.setup(train_chain) for param in train_chain.params(): if param.name == 'b': param.update_rule.add_hook(GradientScaling(2)) else: param.update_rule.add_hook(WeightDecay(0.0005)) # アップデーターの設定 updater = training.updaters.StandardUpdater(train_iter, optimizer, device=gpu_id) # トレーナーの設定 trainer = training.Trainer(updater, (epoch_max, 'epoch'), out_num) trainer.extend(extensions.ExponentialShift('lr', lr_decay_rate, init=initial_lr), trigger=triggers.ManualScheduleTrigger( lr_decay_timing, 'epoch')) trainer.extend(DetectionVOCEvaluator(valid_iter, model, use_07_metric=False, label_names=voc_labels), trigger=(1, 'epoch')) trainer.extend(extensions.LogReport(trigger=log_interval)) trainer.extend(extensions.observe_lr(), trigger=log_interval) trainer.extend(extensions.PrintReport([ 'epoch', 'iteration', 'lr', 'main/loss', 'main/loss/loc', 'main/loss/conf', 'validation/main/map', 'elapsed_time' ]), trigger=log_interval) if extensions.PlotReport.available(): trainer.extend( extensions.PlotReport( ['main/loss', 'main/loss/loc', 'main/loss/conf'], 'epoch', file_name='loss.png')) trainer.extend( extensions.PlotReport(['validation/main/map'], 'epoch', file_name='accuracy.png')) trainer.extend( extensions.snapshot(filename='snapshot_epoch_{.updater.epoch}.npz'), trigger=(10, 'epoch')) # 途中で止めた学習を再開する場合は、trainerにスナップショットをロードして再開する # serializers.load_npz('results/snapshot_epoch_100.npz', trainer) # 学習実行 trainer.run() # 学習データの保存 model.to_cpu() serializers.save_npz('my_ssd_model.npz', model)
def main(): parser = argparse.ArgumentParser() parser.add_argument('--device', '-d', type=str, default='-1', help='Device specifier. Either ChainerX device ' 'specifier or an integer. If non-negative integer, ' 'CuPy arrays with specified device id are used. If ' 'negative integer, NumPy arrays are used') parser.add_argument('--epoch', '-e', default=400, type=int, help='number of epochs to learn') parser.add_argument('--unit', '-u', default=30, type=int, help='number of units') parser.add_argument('--batchsize', '-b', type=int, default=25, help='learning minibatch size') parser.add_argument('--label', '-l', type=int, default=5, help='number of labels') parser.add_argument('--epocheval', '-p', type=int, default=5, help='number of epochs per evaluation') parser.add_argument('--test', dest='test', action='store_true') parser.set_defaults(test=False) group = parser.add_argument_group('deprecated arguments') group.add_argument('--gpu', '-g', dest='device', type=int, nargs='?', const=0, help='GPU ID (negative value indicates CPU)') args = parser.parse_args() vocab = {} max_size = None train_trees = data.read_corpus('trees/train.txt', max_size) test_trees = data.read_corpus('trees/test.txt', max_size) device = chainer.get_device(args.device) device.use() xp = device.xp train_data = [linearize_tree(vocab, t, xp) for t in train_trees] train_iter = chainer.iterators.SerialIterator(train_data, args.batchsize) test_data = [linearize_tree(vocab, t, xp) for t in test_trees] test_iter = chainer.iterators.SerialIterator(test_data, args.batchsize, repeat=False, shuffle=False) model = ThinStackRecursiveNet(len(vocab), args.unit, args.label) model.to_device(device) optimizer = chainer.optimizers.AdaGrad(0.1) optimizer.setup(model) updater = training.StandardUpdater(train_iter, optimizer, converter=convert, device=device) trainer = training.Trainer(updater, (args.epoch, 'epoch')) trainer.extend(extensions.Evaluator(test_iter, model, converter=convert, device=device), trigger=(args.epocheval, 'epoch')) trainer.extend(extensions.LogReport()) trainer.extend( extensions.MicroAverage('main/correct', 'main/total', 'main/accuracy')) trainer.extend( extensions.MicroAverage('validation/main/correct', 'validation/main/total', 'validation/main/accuracy')) trainer.extend( extensions.PrintReport([ 'epoch', 'main/loss', 'validation/main/loss', 'main/accuracy', 'validation/main/accuracy', 'elapsed_time' ])) trainer.extend(extensions.ProgressBar(update_interval=10)) trainer.run()
def train(config): config_backup = copy.deepcopy(config) # Setup device, comm = get_device_communicator(config['gpu'], config['communicator'], config['seed'], config['batchsize']) chainer.config.comm = comm # To use from the inside of models if config.get('seed', None) is not None: random.seed(config['seed']) numpy.random.seed(config['seed']) cuda.cupy.random.seed(config['seed']) # Prepare dataset and models if not config['label']: if comm.mpi_comm.rank == 0: dataset = make_instance(tgan2, config['dataset']) else: dataset = None dataset = chainermn.scatter_dataset(dataset, comm, shuffle=True) # Retrieve property from the original of SubDataset n_channels = dataset._dataset.n_channels gen = make_instance(tgan2, config['gen'], args={'out_channels': n_channels}) dis = make_instance(tgan2, config['dis'], args={'in_channels': n_channels}) else: if comm.mpi_comm.rank == 0: print('## NOTE: Training Conditional TGAN') dataset = make_instance(tgan2, config['dataset'], args={'label': True}) else: dataset = None dataset = chainermn.scatter_dataset(dataset, comm, shuffle=True) # Retrieve property from the original of SubDataset n_channels = dataset._dataset.n_channels n_classes = dataset._dataset.n_classes gen = make_instance(tgan2, config['gen'], args={ 'out_channels': n_channels, 'n_classes': n_classes }) dis = make_instance(tgan2, config['dis'], args={ 'in_channels': n_channels, 'n_classes': n_classes }) if device >= 0: chainer.cuda.get_device(device).use() gen.to_gpu() dis.to_gpu() if comm.mpi_comm.rank == 0: def print_params(link): n_params = sum([p.size for n, p in link.namedparams()]) print('# of params in {}:\t{}'.format(link.__class__.__name__, n_params)) print_params(gen) print_params(dis) # Prepare optimizers gen_optimizer = chainermn.create_multi_node_optimizer( make_instance(chainer.optimizers, config['gen_opt']), comm) dis_optimizer = chainermn.create_multi_node_optimizer( make_instance(chainer.optimizers, config['dis_opt']), comm) gen_optimizer.setup(gen) dis_optimizer.setup(dis) optimizers = { 'generator': gen_optimizer, 'discriminator': dis_optimizer, } iterator = chainer.iterators.MultithreadIterator( dataset, batch_size=config['batchsize']) updater = make_instance(tgan2, config['updater'], args={ 'iterator': iterator, 'optimizer': optimizers, 'device': device }) # Prepare trainer and its extensions trainer = training.Trainer(updater, (config['iteration'], 'iteration'), out=config['out']) snapshot_interval = (config['snapshot_interval'], 'iteration') display_interval = (config['display_interval'], 'iteration') if comm.rank == 0: # Inception score if config.get('inception_score', None) is not None: conf_classifier = config['inception_score']['classifier'] classifier = make_instance(tgan2, conf_classifier) if 'model_path' in conf_classifier: chainer.serializers.load_npz(conf_classifier['model_path'], classifier, path=conf_classifier['npz_path']) if device >= 0: classifier = classifier.to_gpu() is_conf = config['inception_score'] is_args = { 'batchsize': is_conf['batchsize'], 'n_samples': is_conf['n_samples'], 'splits': is_conf['splits'], 'n_frames': is_conf['n_frames'], } trainer.extend(tgan2.make_inception_score_extension( gen, classifier, **is_args), trigger=(is_conf['interval'], 'iteration')) # Snapshot trainer.extend(extensions.snapshot_object( gen, 'generator_iter_{.updater.iteration}.npz'), trigger=snapshot_interval) # Do not save discriminator to save the space # trainer.extend( # extensions.snapshot_object( # dis, 'discriminator_iter_{.updater.iteration}.npz'), # trigger=snapshot_interval) # Save movie if config.get('preview', None) is not None: preview_batchsize = config['preview']['batchsize'] trainer.extend(tgan2.out_generated_movie( gen, dis, rows=config['preview']['rows'], cols=config['preview']['cols'], seed=0, dst=config['out'], batchsize=preview_batchsize), trigger=snapshot_interval) # Log trainer.extend(extensions.LogReport(trigger=display_interval)) report_keys = config['report_keys'] if config.get('inception_score', None) is not None: report_keys.append('IS_mean') trainer.extend(extensions.PrintReport(report_keys), trigger=display_interval) trainer.extend( extensions.ProgressBar(update_interval=display_interval[0])) # Linear decay if ('linear_decay' in config) and (config['linear_decay']['start'] is not None): if comm.rank == 0: print('Use linear decay: {}:{} -> {}:{}'.format( config['linear_decay']['start'], config['iteration'], config['gen_opt']['args']['alpha'], 0.)) trainer.extend( extensions.LinearShift( 'alpha', (config['gen_opt']['args']['alpha'], 0.), (config['linear_decay']['start'], config['iteration']), gen_optimizer)) trainer.extend( extensions.LinearShift( 'alpha', (config['dis_opt']['args']['alpha'], 0.), (config['linear_decay']['start'], config['iteration']), dis_optimizer)) # Checkpointer config_hash = hashlib.sha1() config_hash.update( yaml.dump(config_backup, default_flow_style=False).encode('utf-8')) os.makedirs('snapshots', exist_ok=True) checkpointer = chainermn.create_multi_node_checkpointer( name='tgan2', comm=comm, path=f'snapshots/{config_hash.hexdigest()}') checkpointer.maybe_load(trainer, gen_optimizer) if trainer.updater.epoch > 0: print('Resuming from checkpoints: epoch =', trainer.updater.epoch) trainer.extend(checkpointer, trigger=snapshot_interval) # Copy config to result dir os.makedirs(config['out'], exist_ok=True) config_path = os.path.join(config['out'], 'config.yml') with open(config_path, 'w') as fp: fp.write(yaml.dump(config_backup, default_flow_style=False)) # Run the training trainer.run()
def train(args): '''Run training''' # display chainer version logging.info('chainer version = ' + chainer.__version__) # seed setting (chainer seed may not need it) os.environ['CHAINER_SEED'] = str(args.seed) logging.info('chainer seed = ' + os.environ['CHAINER_SEED']) # debug mode setting # 0 would be fastest, but 1 seems to be reasonable # by considering reproducability # revmoe type check if args.debugmode < 2: chainer.config.type_check = False logging.info('chainer type check is disabled') # use determinisitic computation or not if args.debugmode < 1: chainer.config.cudnn_deterministic = False logging.info('chainer cudnn deterministic is disabled') else: chainer.config.cudnn_deterministic = True # check cuda and cudnn availability if not chainer.cuda.available: logging.warning('cuda is not available') if not chainer.cuda.cudnn_enabled: logging.warning('cudnn is not available') # get input and output dimension info with open(args.valid_json, 'rb') as f: valid_json = json.load(f)['utts'] utts = list(valid_json.keys()) idim = int(valid_json[utts[0]]['input'][0]['shape'][1]) odim = int(valid_json[utts[0]]['output'][0]['shape'][1]) logging.info('#input dims : ' + str(idim)) logging.info('#output dims: ' + str(odim)) # check attention type if args.atype not in ['noatt', 'dot', 'location']: raise NotImplementedError( 'chainer supports only noatt, dot, and location attention.') # specify attention, CTC, hybrid mode if args.mtlalpha == 1.0: mtl_mode = 'ctc' logging.info('Pure CTC mode') elif args.mtlalpha == 0.0: mtl_mode = 'att' logging.info('Pure attention mode') else: mtl_mode = 'mtl' logging.info('Multitask learning mode') # specify model architecture e2e = E2E(idim, odim, args) model = Loss(e2e, args.mtlalpha) # write model config if not os.path.exists(args.outdir): os.makedirs(args.outdir) model_conf = args.outdir + '/model.json' with open(model_conf, 'wb') as f: logging.info('writing a model config file to ' + model_conf) f.write( json.dumps((idim, odim, vars(args)), indent=4, sort_keys=True).encode('utf_8')) for key in sorted(vars(args).keys()): logging.info('ARGS: ' + key + ': ' + str(vars(args)[key])) # Set gpu ngpu = args.ngpu if ngpu == 1: gpu_id = 0 # Make a specified GPU current chainer.cuda.get_device_from_id(gpu_id).use() model.to_gpu() # Copy the model to the GPU logging.info('single gpu calculation.') elif ngpu > 1: gpu_id = 0 devices = {'main': gpu_id} for gid in six.moves.xrange(1, ngpu): devices['sub_%d' % gid] = gid logging.info('multi gpu calculation (#gpus = %d).' % ngpu) logging.info('batch size is automatically increased (%d -> %d)' % (args.batch_size, args.batch_size * args.ngpu)) else: gpu_id = -1 logging.info('cpu calculation') # Setup an optimizer if args.opt == 'adadelta': optimizer = chainer.optimizers.AdaDelta(eps=args.eps) elif args.opt == 'adam': optimizer = chainer.optimizers.Adam() optimizer.setup(model) optimizer.add_hook(chainer.optimizer.GradientClipping(args.grad_clip)) # read json data with open(args.train_json, 'rb') as f: train_json = json.load(f)['utts'] with open(args.valid_json, 'rb') as f: valid_json = json.load(f)['utts'] # set up training iterator and updater converter = CustomConverter(e2e.subsample[0]) if ngpu <= 1: # make minibatch list (variable length) train = make_batchset(train_json, args.batch_size, args.maxlen_in, args.maxlen_out, args.minibatches) # hack to make batchsize argument as 1 # actual batchsize is included in a list if args.n_iter_processes > 0: train_iter = chainer.iterators.MultiprocessIterator( TransformDataset(train, converter.transform), batch_size=1, n_processes=args.n_iter_processes, n_prefetch=8, maxtasksperchild=20) else: train_iter = chainer.iterators.SerialIterator(TransformDataset( train, converter.transform), batch_size=1) # set up updater updater = CustomUpdater(train_iter, optimizer, converter=converter, device=gpu_id) else: # set up minibatches train_subsets = [] for gid in six.moves.xrange(ngpu): # make subset train_json_subset = { k: v for i, (k, v) in enumerate(train_json.items()) if i % ngpu == gid } # make minibatch list (variable length) train_subsets += [ make_batchset(train_json_subset, args.batch_size, args.maxlen_in, args.maxlen_out, args.minibatches) ] # each subset must have same length for MultiprocessParallelUpdater maxlen = max([len(train_subset) for train_subset in train_subsets]) for train_subset in train_subsets: if maxlen != len(train_subset): for i in six.moves.xrange(maxlen - len(train_subset)): train_subset += [train_subset[i]] # hack to make batchsize argument as 1 # actual batchsize is included in a list if args.n_iter_processes > 0: train_iters = [ chainer.iterators.MultiprocessIterator( TransformDataset(train_subsets[gid], converter.transform), batch_size=1, n_processes=args.n_iter_processes, n_prefetch=8, maxtasksperchild=20) for gid in six.moves.xrange(ngpu) ] else: train_iters = [ chainer.iterators.SerialIterator(TransformDataset( train_subsets[gid], converter.transform), batch_size=1) for gid in six.moves.xrange(ngpu) ] # set up updater updater = CustomParallelUpdater(train_iters, optimizer, converter=converter, devices=devices) # Set up a trainer trainer = training.Trainer(updater, (args.epochs, 'epoch'), out=args.outdir) # Resume from a snapshot if args.resume: chainer.serializers.load_npz(args.resume, trainer) # set up validation iterator valid = make_batchset(valid_json, args.batch_size, args.maxlen_in, args.maxlen_out, args.minibatches) if args.n_iter_processes > 0: valid_iter = chainer.iterators.MultiprocessIterator( TransformDataset(valid, converter.transform), batch_size=1, repeat=False, shuffle=False, n_processes=args.n_iter_processes, n_prefetch=8, maxtasksperchild=20) else: valid_iter = chainer.iterators.SerialIterator(TransformDataset( valid, converter.transform), batch_size=1, repeat=False, shuffle=False) # Evaluate the model with the test dataset for each epoch trainer.extend( extensions.Evaluator(valid_iter, model, converter=converter, device=gpu_id)) # Save attention weight each epoch if args.num_save_attention > 0 and args.mtlalpha != 1.0: data = sorted(list(valid_json.items())[:args.num_save_attention], key=lambda x: int(x[1]['input'][0]['shape'][1]), reverse=True) if hasattr(model, "module"): att_vis_fn = model.module.predictor.calculate_all_attentions else: att_vis_fn = model.predictor.calculate_all_attentions trainer.extend(PlotAttentionReport(att_vis_fn, data, args.outdir + "/att_ws", converter=converter, device=gpu_id), trigger=(1, 'epoch')) # Take a snapshot for each specified epoch trainer.extend( extensions.snapshot(filename='snapshot.ep.{.updater.epoch}'), trigger=(1, 'epoch')) # Make a plot for training and validation values trainer.extend( extensions.PlotReport([ 'main/loss', 'validation/main/loss', 'main/loss_ctc', 'validation/main/loss_ctc', 'main/loss_att', 'validation/main/loss_att' ], 'epoch', file_name='loss.png')) trainer.extend( extensions.PlotReport(['main/acc', 'validation/main/acc'], 'epoch', file_name='acc.png')) # Save best models trainer.extend( extensions.snapshot_object(model, 'model.loss.best'), trigger=training.triggers.MinValueTrigger('validation/main/loss')) if mtl_mode is not 'ctc': trainer.extend( extensions.snapshot_object(model, 'model.acc.best'), trigger=training.triggers.MaxValueTrigger('validation/main/acc')) # epsilon decay in the optimizer if args.opt == 'adadelta': if args.criterion == 'acc' and mtl_mode is not 'ctc': trainer.extend(restore_snapshot(model, args.outdir + '/model.acc.best'), trigger=CompareValueTrigger( 'validation/main/acc', lambda best_value, current_value: best_value > current_value)) trainer.extend(adadelta_eps_decay(args.eps_decay), trigger=CompareValueTrigger( 'validation/main/acc', lambda best_value, current_value: best_value > current_value)) elif args.criterion == 'loss': trainer.extend(restore_snapshot(model, args.outdir + '/model.loss.best'), trigger=CompareValueTrigger( 'validation/main/loss', lambda best_value, current_value: best_value < current_value)) trainer.extend(adadelta_eps_decay(args.eps_decay), trigger=CompareValueTrigger( 'validation/main/loss', lambda best_value, current_value: best_value < current_value)) # Write a log of evaluation statistics for each epoch trainer.extend(extensions.LogReport(trigger=(REPORT_INTERVAL, 'iteration'))) report_keys = [ 'epoch', 'iteration', 'main/loss', 'main/loss_ctc', 'main/loss_att', 'validation/main/loss', 'validation/main/loss_ctc', 'validation/main/loss_att', 'main/acc', 'validation/main/acc', 'elapsed_time' ] if args.opt == 'adadelta': trainer.extend(extensions.observe_value( 'eps', lambda trainer: trainer.updater.get_optimizer('main').eps), trigger=(REPORT_INTERVAL, 'iteration')) report_keys.append('eps') trainer.extend(extensions.PrintReport(report_keys), trigger=(REPORT_INTERVAL, 'iteration')) trainer.extend(extensions.ProgressBar(update_interval=REPORT_INTERVAL)) # Run the training trainer.run()
def main(args): # Initialize the model to train model = models.archs[args.arch]() if args.finetune and hasattr(model, 'finetuned_model_path'): utils.finetuning.load_param(model.finetuned_model_path, model, args.ignore) #model.finetune = True if args.initmodel: print('Load model from', args.initmodel) chainer.serializers.load_npz(args.initmodel, model) if args.gpu >= 0: chainer.cuda.get_device(args.gpu).use() model.to_gpu() nowt = datetime.datetime.today() outputdir = args.out + '/' + args.arch + '/' + nowt.strftime( "%Y%m%d-%H%M") + '_bs' + str(args.batchsize) if args.test and args.initmodel is not None: outputdir = os.path.dirname(args.initmodel) # Load the datasets and mean file mean = None if hasattr(model, 'mean_value'): mean = makeMeanImage(model.mean_value) else: mean = np.load(args.mean) assert mean is not None train = ppds.PreprocessedDataset(args.train, args.root, mean, model.insize) val = ppds.PreprocessedDataset(args.val, args.root, mean, model.insize, False) # These iterators load the images with subprocesses running in parallel to # the training/validation. train_iter = chainer.iterators.MultiprocessIterator( train, args.batchsize, shuffle=False, n_processes=args.loaderjob) #val_iter = chainer.iterators.MultiprocessIterator( # val, args.val_batchsize, repeat=False, shuffle=False, n_processes=args.loaderjob) val_iter = chainer.iterators.SerialIterator(val, args.val_batchsize, repeat=False, shuffle=False) # Set up an optimizer optimizer = optimizers[args.opt]() #if args.opt == 'momentumsgd': if hasattr(optimizer, 'lr'): optimizer.lr = args.baselr if hasattr(optimizer, 'momentum'): optimizer.momentum = args.momentum optimizer.setup(model) # Set up a trainer updater = training.StandardUpdater(train_iter, optimizer, device=args.gpu) trainer = training.Trainer(updater, (args.epoch, 'epoch'), outputdir) #val_interval = (10 if args.test else int(len(train) / args.batchsize)), 'iteration' val_interval = (10, 'iteration') if args.test else (1, 'epoch') snapshot_interval = (10, 'iteration') if args.test else (4, 'epoch') log_interval = (10 if args.test else 200), 'iteration' # Copy the chain with shared parameters to flip 'train' flag only in test eval_model = model.copy() eval_model.train = False if not args.test: val_evaluator = extensions.Evaluator(val_iter, eval_model, device=args.gpu) else: val_evaluator = utils.EvaluatorPlus(val_iter, eval_model, device=args.gpu) if 'googlenet' in args.arch: val_evaluator.lastname = 'validation/main/loss3' trainer.extend(val_evaluator, trigger=val_interval) trainer.extend(extensions.dump_graph('main/loss')) trainer.extend(extensions.snapshot(), trigger=snapshot_interval) trainer.extend(extensions.snapshot_object( model, 'model_iter_{.updater.iteration}'), trigger=(500, 'iteration')) # Be careful to pass the interval directly to LogReport # (it determines when to emit log rather than when to read observations) trainer.extend(extensions.LogReport(trigger=log_interval)) trainer.extend(extensions.PrintReport([ 'epoch', 'iteration', 'main/loss', 'validation/main/loss', 'main/accuracy', 'validation/main/accuracy', ]), trigger=log_interval) trainer.extend(extensions.ProgressBar(update_interval=10)) if args.opt == 'momentumsgd': trainer.extend(extensions.ExponentialShift('lr', args.gamma), trigger=(1, 'epoch')) if args.resume: chainer.serializers.load_npz(args.resume, trainer) if not args.test: chainer.serializers.save_npz(outputdir + '/model0', model) trainer.run() chainer.serializers.save_npz(outputdir + '/model', model) with open(outputdir + '/args.txt', 'w') as o: print(args, file=o) results = val_evaluator(trainer) results['outputdir'] = outputdir if args.test: print(val_evaluator.confmat) categories = utils.io.load_categories(args.categories) confmat_csv_name = args.initmodel + '.csv' confmat_fig_name = args.initmodel + '.eps' utils.io.save_confmat_csv(confmat_csv_name, val_evaluator.confmat, categories) utils.io.save_confmat_fig(confmat_fig_name, val_evaluator.confmat, categories, mode="rate", saveFormat="eps") return results
def main(): ''' main function, start point ''' # 引数関連 parser = argparse.ArgumentParser() parser.add_argument('--batchsize', '-b', type=int, default=128, help='Number of images in each mini-batch') parser.add_argument('--learnrate', '-l', type=float, default=0.001, help='Learning rate for SGD') parser.add_argument('--epoch', '-e', type=int, default=100, help='Number of sweeps over the dataset to train') parser.add_argument('--gpu', '-g', type=int, default=0, help='GPU ID (negative value indicates CPU)') parser.add_argument('--resume', '-r', default='', help='Resume the training from snapshot') parser.add_argument('--iter_parallel', '-p', action='store_true', default=False, help='filter(kernel) sizes') parser.add_argument('--opt', '-o', type=str, choices=('adam', 'sgd'), default='adam') args = parser.parse_args() # parameter出力 print("-=Learning Parameter=-") print("# Max Epochs: {}".format(args.epoch)) print("# Batch Size: {}".format(args.batchsize)) print("# Learning Rate: {}".format(args.learnrate)) print("# Optimizer Method: {}".format(args.opt)) print('# Train Dataet: General 100') if args.iter_parallel: print("# Data Iters that loads in Parallel") print("\n") # 保存ディレクトリ # save didrectory outdir = path.join( ROOT_PATH, 'results/FI/AEFINet/AEFINetConcat_ch4_fsize5_VGG_content_loss_opt_{}'. format(args.opt)) if not path.exists(outdir): os.makedirs(outdir) with open(path.join(outdir, 'arg_param.txt'), 'w') as f: for k, v in args.__dict__.items(): f.write('{}:{}\n'.format(k, v)) print('# loading dataet(General100_train, General100_test) ...') if args.iter_parallel: train = SequenceDataset(dataset='train') test = SequenceDataset(dataset='test') else: train = SequenceDatasetOnMem(dataset='train') test = SequenceDatasetOnMem(dataset='test') # prepare model vgg16 = N.VGG16() if args.gpu >= 0: chainer.cuda.get_device_from_id(args.gpu).use() vgg16.to_gpu() chainer.serializers.load_npz(path.join(ROOT_PATH, 'models/VGG16.npz'), vgg16) model = N.VGG16Evaluator(N.AEFINetConcat(ch=4, f_size=5), vgg16) if args.gpu >= 0: model.to_gpu() # setup optimizer if args.opt == 'adam': optimizer = chainer.optimizers.Adam(alpha=args.learnrate) elif args.opt == 'sgd': optimizer = chainer.optimizers.MomentumSGD(lr=args.learnrate, momentum=0.9) optimizer.setup(model) optimizer.add_hook(chainer.optimizer.WeightDecay(0.0001)) # setup iter if args.iter_parallel: train_iter = chainer.iterators.MultiprocessIterator(train, args.batchsize, n_processes=8) test_iter = chainer.iterators.MultiprocessIterator(test, args.batchsize, repeat=False, shuffle=False, n_processes=8) else: train_iter = chainer.iterators.SerialIterator(train, args.batchsize) test_iter = chainer.iterators.SerialIterator(test, args.batchsize, repeat=False, shuffle=False) # setup trainer updater = training.StandardUpdater(train_iter, optimizer, device=args.gpu) trainer = training.Trainer(updater, (args.epoch, 'epoch'), out=outdir) # # eval test data trainer.extend(extensions.Evaluator(test_iter, model, device=args.gpu)) # dump loss graph trainer.extend(extensions.dump_graph('main/loss')) # lr shift if args.opt == 'sgd': trainer.extend(extensions.ExponentialShift("lr", 0.1), trigger=(100, 'epoch')) if args.opt == 'adam': trainer.extend(extensions.ExponentialShift("alpha", 0.1), trigger=(50, 'epoch')) # save snapshot trainer.extend(extensions.snapshot(), trigger=(10, 'epoch')) trainer.extend(extensions.snapshot_object( model, 'model_snapshot_{.updater.epoch}'), trigger=(10, 'epoch')) # log report trainer.extend(extensions.LogReport()) trainer.extend(extensions.observe_lr(), trigger=(1, 'epoch')) # plot loss graph trainer.extend( extensions.PlotReport(['main/loss', 'validation/main/loss'], 'epoch', file_name='loss.png')) # plot acc graph trainer.extend( extensions.PlotReport(['main/PSNR', 'validation/main/PSNR'], 'epoch', file_name='PSNR.png')) # print info trainer.extend( extensions.PrintReport([ 'epoch', 'main/loss', 'validation/main/loss', 'main/loss_mse', 'main/loss_cont', 'main/PSNR', 'validation/main/PSNR', 'lr', 'elapsed_time' ])) # print progbar trainer.extend(extensions.ProgressBar()) trainer.run()
def main(): parser = argparse.ArgumentParser(description='Chainer CIFAR example:') parser.add_argument('--dataset', '-d', default='cifar10', help='The dataset to use: cifar10 or cifar100') parser.add_argument('--batchsize', '-b', type=int, default=64, help='Number of images in each mini-batch') parser.add_argument('--learnrate', '-l', type=float, default=0.05, help='Learning rate for SGD') parser.add_argument('--epoch', '-e', type=int, default=300, help='Number of sweeps over the dataset to train') parser.add_argument('--gpu', '-g', type=int, default=0, help='GPU ID (negative value indicates CPU)') parser.add_argument('--out', '-o', default='result', help='Directory to output the result') parser.add_argument('--resume', '-r', default='', help='Resume the training from snapshot') args = parser.parse_args() print('GPU: {}'.format(args.gpu)) print('# Minibatch-size: {}'.format(args.batchsize)) print('# epoch: {}'.format(args.epoch)) print('') # Set up a neural network to train. # Classifier reports softmax cross entropy loss and accuracy at every # iteration, which will be used by the PrintReport extension below. if args.dataset == 'cifar10': print('Using CIFAR10 dataset.') class_labels = 10 train, test = get_cifar10() elif args.dataset == 'cifar100': print('Using CIFAR100 dataset.') class_labels = 100 train, test = get_cifar100() else: raise RuntimeError('Invalid dataset choice.') model = L.Classifier(models.VGG.VGG(class_labels)) if args.gpu >= 0: # Make a specified GPU current chainer.cuda.get_device_from_id(args.gpu).use() model.to_gpu() # Copy the model to the GPU optimizer = chainer.optimizers.MomentumSGD(args.learnrate) optimizer.setup(model) optimizer.add_hook(chainer.optimizer.WeightDecay(5e-4)) train_iter = chainer.iterators.SerialIterator(train, args.batchsize) test_iter = chainer.iterators.SerialIterator(test, args.batchsize, repeat=False, shuffle=False) # Set up a trainer updater = training.StandardUpdater(train_iter, optimizer, device=args.gpu) trainer = training.Trainer(updater, (args.epoch, 'epoch'), out=args.out) # Evaluate the model with the test dataset for each epoch trainer.extend(extensions.Evaluator(test_iter, model, device=args.gpu)) # Reduce the learning rate by half every 25 epochs. trainer.extend(extensions.ExponentialShift('lr', 0.5), trigger=(25, 'epoch')) # Dump a computational graph from 'loss' variable at the first iteration # The "main" refers to the target link of the "main" optimizer. trainer.extend(extensions.dump_graph('main/loss')) # Take a snapshot at each epoch trainer.extend(extensions.snapshot(), trigger=(args.epoch, 'epoch')) # Take a snapshot of best model trainer.extend(extensions.snapshot_object(model, 'model_best'), trigger=MinValueTrigger('validation/main/loss')) # Write a log of evaluation statistics for each epoch trainer.extend(extensions.LogReport()) # Save loss and accuracy plot if extensions.PlotReport.available(): trainer.extend( extensions.PlotReport(['main/loss', 'validation/main/loss'], 'epoch', file_name='loss.png')) trainer.extend( extensions.PlotReport( ['main/accuracy', 'validation/main/accuracy'], 'epoch', file_name='accuracy.png')) # Print selected entries of the log to stdout # Here "main" refers to the target link of the "main" optimizer again, and # "validation" refers to the default name of the Evaluator extension. # Entries other than 'epoch' are reported by the Classifier link, called by # either the updater or the evaluator. trainer.extend( extensions.PrintReport([ 'epoch', 'main/loss', 'validation/main/loss', 'main/accuracy', 'validation/main/accuracy', 'elapsed_time' ])) # Print a progress bar to stdout trainer.extend(extensions.ProgressBar()) if args.resume: # Resume from a snapshot chainer.serializers.load_npz(args.resume, trainer) # Run the training trainer.run()
def main(): parser = argparse.ArgumentParser(description='World Models ' + ID) parser.add_argument('--data_dir', '-d', default="data/wm", help='The base data/output directory') parser.add_argument( '--game', default='CarRacing-v0', help='Game to use') # https://gym.openai.com/envs/CarRacing-v0/ parser.add_argument('--experiment_name', default='experiment_1', help='To isolate its files from others') parser.add_argument( '--load_batch_size', default=100, type=int, help='Load rollouts in batches so as not to run out of memory') parser.add_argument( '--model', '-m', default='', help= 'Initialize the model from given file, or "default" for one in data folder' ) parser.add_argument('--no_resume', action='store_true', help='Don' 't auto resume from the latest snapshot') parser.add_argument( '--resume_from', '-r', default='', help='Resume the optimization from a specific snapshot') parser.add_argument('--test', action='store_true', help='Generate samples only') parser.add_argument('--gpu', '-g', default=0, type=int, help='GPU ID (negative value indicates CPU)') parser.add_argument('--epoch', '-e', default=20, type=int, help='number of epochs to learn') parser.add_argument('--snapshot_interval', '-s', default=200, type=int, help='snapshot every x games') parser.add_argument('--z_dim', '-z', default=32, type=int, help='dimension of encoded vector') parser.add_argument('--hidden_dim', default=256, type=int, help='LSTM hidden units') parser.add_argument('--mixtures', default=5, type=int, help='number of gaussian mixtures for MDN') parser.add_argument('--no_progress_bar', '-p', action='store_true', help='Display progress bar during training') parser.add_argument('--predict_done', action='store_true', help='Whether MDN-RNN should also predict done state') parser.add_argument('--sample_temperature', default=1., type=float, help='Temperature for generating samples') parser.add_argument('--gradient_clip', default=0., type=float, help='Clip grads L2 norm threshold. 0 = no clip') parser.add_argument('--sequence_length', type=int, default=128, help='sequence length for LSTM for TBPTT') args = parser.parse_args() log(ID, "args =\n " + str(vars(args)).replace(",", ",\n ")) output_dir = os.path.join(args.data_dir, args.game, args.experiment_name, ID) mkdir(output_dir) random_rollouts_dir = os.path.join(args.data_dir, args.game, args.experiment_name, 'random_rollouts') vision_dir = os.path.join(args.data_dir, args.game, args.experiment_name, 'vision') log(ID, "Starting") max_iter = 0 auto_resume_file = None files = os.listdir(output_dir) for file in files: if re.match(r'^snapshot_iter_', file): iter = int(re.search(r'\d+', file).group()) if (iter > max_iter): max_iter = iter if max_iter > 0: auto_resume_file = os.path.join(output_dir, "snapshot_iter_{}".format(max_iter)) model = MDN_RNN(args.hidden_dim, args.z_dim, args.mixtures, args.predict_done) vision = CVAE(args.z_dim) chainer.serializers.load_npz(os.path.join(vision_dir, "vision.model"), vision) if args.model: if args.model == 'default': args.model = os.path.join(output_dir, ID + ".model") log(ID, "Loading saved model from: " + args.model) chainer.serializers.load_npz(args.model, model) optimizer = chainer.optimizers.Adam() optimizer.setup(model) if args.gradient_clip > 0.: optimizer.add_hook( chainer.optimizer_hooks.GradientClipping(args.gradient_clip)) log(ID, "Loading training data") train = ModelDataset(dir=random_rollouts_dir, load_batch_size=args.load_batch_size, verbose=False) train_iter = chainer.iterators.SerialIterator(train, batch_size=1, shuffle=False) updater = TBPTTUpdater(train_iter, optimizer, args.gpu, model.get_loss_func(), args.sequence_length) trainer = training.Trainer(updater, (args.epoch, 'epoch'), out=output_dir) trainer.extend(extensions.snapshot(), trigger=(args.snapshot_interval, 'iteration')) trainer.extend( extensions.LogReport(trigger=(10 if args.gpu >= 0 else 1, 'iteration'))) trainer.extend( extensions.PrintReport(['epoch', 'iteration', 'loss', 'elapsed_time'])) if not args.no_progress_bar: trainer.extend( extensions.ProgressBar(update_interval=10 if args.gpu >= 0 else 1)) sample_size = 256 rollout_z_t, rollout_z_t_plus_1, rollout_action, done = train[0] sample_z_t = rollout_z_t[0:sample_size] sample_z_t_plus_1 = rollout_z_t_plus_1[0:sample_size] sample_action = rollout_action[0:sample_size] img_t = vision.decode(sample_z_t).data img_t_plus_1 = vision.decode(sample_z_t_plus_1).data if args.predict_done: done = done.reshape(-1) img_t_plus_1[np.where( done[0:sample_size] >= 0.5), :, :, :] = 0 # Make done black save_images_collage(img_t, os.path.join(output_dir, 'train_t.png')) save_images_collage(img_t_plus_1, os.path.join(output_dir, 'train_t_plus_1.png')) image_sampler = ImageSampler(model.copy(), vision, args, output_dir, sample_z_t, sample_action) trainer.extend(image_sampler, trigger=(args.snapshot_interval, 'iteration')) if args.resume_from: log(ID, "Resuming trainer manually from snapshot: " + args.resume_from) chainer.serializers.load_npz(args.resume_from, trainer) elif not args.no_resume and auto_resume_file is not None: log(ID, "Auto resuming trainer from last snapshot: " + auto_resume_file) chainer.serializers.load_npz(auto_resume_file, trainer) if not args.test: log(ID, "Starting training") trainer.run() log(ID, "Done training") log(ID, "Saving model") chainer.serializers.save_npz(os.path.join(output_dir, ID + ".model"), model) if args.test: log(ID, "Saving test samples") image_sampler(trainer) log(ID, "Generating gif for a rollout generated in dream") if args.gpu >= 0: model.to_cpu() model.reset_state() # current_z_t = np.random.randn(64).astype(np.float32) # Noise as starting frame rollout_z_t, rollout_z_t_plus_1, rollout_action, done = train[ np.random.randint(len(train))] # Pick a random real rollout current_z_t = rollout_z_t[0] # Starting frame from the real rollout current_z_t += np.random.normal(0, 0.5, current_z_t.shape).astype( np.float32) # Add some noise to the real rollout starting frame all_z_t = [current_z_t] # current_action = np.asarray([0., 1.]).astype(np.float32) for i in range(rollout_z_t.shape[0]): # if i != 0 and i % 200 == 0: current_action = 1 - current_action # Flip actions every 100 frames current_action = np.expand_dims( rollout_action[i], 0) # follow actions performed in a real rollout output = model(current_z_t, current_action, temperature=args.sample_temperature) if args.predict_done: current_z_t, done = output done = done.data # print(i, current_action, done) else: current_z_t = output all_z_t.append(current_z_t.data) if args.predict_done and done[0] >= 0.5: break dream_rollout_imgs = vision.decode(np.asarray(all_z_t).astype( np.float32)).data dream_rollout_imgs = post_process_image_tensor(dream_rollout_imgs) imageio.mimsave(os.path.join(output_dir, 'dream_rollout.gif'), dream_rollout_imgs, fps=20) log(ID, "Done")
def main(): # command line argument parsing parser = argparse.ArgumentParser( description='Multi-Perceptron classifier/regressor') parser.add_argument('train', help='Path to csv file') parser.add_argument('--root', '-R', default="betti", help='Path to image files') parser.add_argument('--val', help='Path to validation csv file', required=True) parser.add_argument('--regress', '-r', action='store_true', help='set for regression, otherwise classification') parser.add_argument('--time_series', '-ts', action='store_true', help='set for time series data') parser.add_argument('--batchsize', '-b', type=int, default=10, help='Number of samples in each mini-batch') parser.add_argument('--layer', '-l', type=str, choices=['res5', 'pool5'], default='pool5', help='output layer of the pretrained ResNet') parser.add_argument('--fch', type=int, nargs="*", default=[], help='numbers of channels for the last fc layers') parser.add_argument('--cols', '-c', type=int, nargs="*", default=[1], help='column indices in csv of target variables') parser.add_argument('--epoch', '-e', type=int, default=300, help='Number of sweeps over the dataset to train') parser.add_argument('--snapshot', '-s', type=int, default=100, help='snapshot interval') parser.add_argument('--initmodel', '-i', help='Initialize the model from given file') parser.add_argument('--random', '-rt', type=int, default=1, help='random translation') parser.add_argument('--gpu', '-g', type=int, help='GPU ID (negative value indicates CPU)') parser.add_argument('--loaderjob', '-j', type=int, default=3, help='Number of parallel data loading processes') parser.add_argument('--outdir', '-o', default='result', help='Directory to output the result') parser.add_argument('--optimizer', '-op', choices=optim.keys(), default='Adam', help='optimizer') parser.add_argument('--resume', type=str, default=None, help='Resume the training from snapshot') parser.add_argument('--predict', '-p', action='store_true', help='prediction with a specified model') parser.add_argument('--tuning_rate', '-tr', type=float, default=0.1, help='learning rate for pretrained layers') parser.add_argument('--dropout', '-dr', type=float, default=0, help='dropout ratio for the FC layers') parser.add_argument('--cw', '-cw', type=int, default=128, help='crop image width') parser.add_argument('--ch', '-ch', type=int, default=128, help='crop image height') parser.add_argument('--weight_decay', '-w', type=float, default=1e-6, help='weight decay for regularization') parser.add_argument('--wd_norm', '-wn', choices=['none', 'l1', 'l2'], default='l2', help='norm of weight decay for regularization') parser.add_argument('--dtype', '-dt', choices=dtypes.keys(), default='fp32', help='floating point precision') args = parser.parse_args() args.outdir = os.path.join(args.outdir, dt.now().strftime('%m%d_%H%M')) # Enable autotuner of cuDNN chainer.config.autotune = True chainer.config.dtype = dtypes[args.dtype] chainer.print_runtime_info() # read csv file train = Dataset(args.root, args.train, cw=args.cw, ch=args.ch, random=args.random, regression=args.regress, time_series=args.time_series, cols=args.cols) test = Dataset(args.root, args.val, cw=args.cw, ch=args.ch, regression=args.regress, time_series=args.time_series, cols=args.cols) ## if not args.gpu: if chainer.cuda.available: args.gpu = 0 else: args.gpu = -1 print(args) save_args(args, args.outdir) if args.regress: accfun = F.mean_absolute_error lossfun = F.mean_squared_error args.chs = len(args.cols) else: accfun = F.accuracy lossfun = F.softmax_cross_entropy args.chs = max(train.chs, test.chs) if len(args.cols) > 1: print("\n\nClassification only works with a single target.\n\n") exit() # Set up a neural network to train model = L.Classifier(Resnet(args), lossfun=lossfun, accfun=accfun) # Set up an optimizer optimizer = optim[args.optimizer]() optimizer.setup(model) if args.weight_decay > 0: if args.wd_norm == 'l2': optimizer.add_hook(chainer.optimizer.WeightDecay( args.weight_decay)) elif args.wd_norm == 'l1': optimizer.add_hook(chainer.optimizer_hooks.Lasso( args.weight_decay)) # slow update for pretrained layers if args.optimizer in ['Adam']: for func_name in model.predictor.base._children: for param in model.predictor.base[func_name].params(): param.update_rule.hyperparam.alpha *= args.tuning_rate if args.initmodel: print('Load model from: ', args.initmodel) chainer.serializers.load_npz(args.initmodel, model) if args.gpu >= 0: chainer.cuda.get_device(args.gpu).use() # Make a specified GPU current model.to_gpu() # Copy the model to the GPU # select numpy or cupy xp = chainer.cuda.cupy if args.gpu >= 0 else np # train_iter = iterators.SerialIterator(train, args.batchsize, shuffle=True) # test_iter = iterators.SerialIterator(test, args.batchsize, repeat=False, shuffle=False) train_iter = iterators.MultithreadIterator(train, args.batchsize, shuffle=True, n_threads=args.loaderjob) test_iter = iterators.MultithreadIterator(test, args.batchsize, repeat=False, shuffle=False, n_threads=args.loaderjob) updater = training.StandardUpdater(train_iter, optimizer, device=args.gpu) trainer = training.Trainer(updater, (args.epoch, 'epoch'), out=args.outdir) frequency = args.epoch if args.snapshot == -1 else max(1, args.snapshot) log_interval = 1, 'epoch' val_interval = 20, 'epoch' # frequency/10, 'epoch' # trainer.extend(extensions.snapshot(filename='snapshot_epoch_{.updater.epoch}'), trigger=(frequency, 'epoch')) trainer.extend(extensions.snapshot_object(model, 'model_epoch_{.updater.epoch}'), trigger=(frequency, 'epoch')) trainer.extend(extensions.LogReport(trigger=log_interval)) trainer.extend(extensions.dump_graph('main/loss')) trainer.extend(extensions.Evaluator(test_iter, model, device=args.gpu), trigger=val_interval) if args.optimizer in ['Momentum', 'AdaGrad', 'RMSprop']: trainer.extend(extensions.observe_lr(), trigger=log_interval) trainer.extend(extensions.ExponentialShift('lr', 0.5), trigger=(args.epoch / 5, 'epoch')) elif args.optimizer in ['Adam']: trainer.extend(extensions.observe_lr(), trigger=log_interval) trainer.extend(extensions.ExponentialShift("alpha", 0.5, optimizer=optimizer), trigger=(args.epoch / 5, 'epoch')) if extensions.PlotReport.available(): trainer.extend( extensions.PlotReport(['main/loss', 'validation/main/loss'], 'epoch', file_name='loss.png')) trainer.extend( extensions.PlotReport( ['main/accuracy', 'validation/main/accuracy'], 'epoch', file_name='accuracy.png')) trainer.extend(extensions.PrintReport([ 'epoch', 'main/loss', 'main/accuracy', 'validation/main/loss', 'validation/main/accuracy', 'elapsed_time', 'lr' ]), trigger=log_interval) trainer.extend(extensions.ProgressBar(update_interval=10)) if args.resume: chainer.serializers.load_npz(args.resume, trainer) # ChainerUI #trainer.extend(CommandsExtension()) trainer.extend(extensions.LogReport(trigger=log_interval)) if not args.predict: trainer.run() ## prediction print("predicting: {} entries...".format(len(test))) test_iter = iterators.SerialIterator(test, args.batchsize, repeat=False, shuffle=False) converter = concat_examples idx = 0 with open(os.path.join(args.outdir, 'result.txt'), 'w') as output: for batch in test_iter: x, t = converter(batch, device=args.gpu) with chainer.using_config('train', False): with chainer.function.no_backprop_mode(): if args.regress: y = model.predictor(x).data if args.gpu > -1: y = xp.asnumpy(y) t = xp.asnumpy(t) y = y * test.std + test.mean t = t * test.std + test.mean else: y = F.softmax(model.predictor(x)).data if args.gpu > -1: y = xp.asnumpy(y) t = xp.asnumpy(t) for i in range(y.shape[0]): output.write(os.path.basename(test.ids[idx])) if (len(t.shape) > 1): for j in range(t.shape[1]): output.write(",{}".format(t[i, j])) output.write(",{}".format(y[i, j])) else: output.write(",{}".format(t[i])) output.write(",{}".format(np.argmax(y[i, :]))) for yy in y[i]: output.write(",{0:1.5f}".format(yy)) output.write("\n") idx += 1
def main(): parser = argparse.ArgumentParser( description='ChainerMN example: pipelined neural network') parser.add_argument('--batchsize', '-b', type=int, default=100, help='Number of images in each mini-batch') parser.add_argument('--epoch', '-e', type=int, default=20, help='Number of sweeps over the dataset to train') parser.add_argument('--gpu', '-g', action='store_true', help='Use GPU') parser.add_argument('--out', '-o', default='result', help='Directory to output the result') parser.add_argument('--unit', '-u', type=int, default=1000, help='Number of units') args = parser.parse_args() # Prepare ChainerMN communicator. if args.gpu: comm = chainermn.create_communicator('hierarchical') device = comm.intra_rank else: comm = chainermn.create_communicator('naive') device = -1 if comm.size != 2: raise ValueError( 'This example can only be executed on exactly 2 processes.') if comm.rank == 0: print('==========================================') if args.gpu: print('Using GPUs') print('Num unit: {}'.format(args.unit)) print('Num Minibatch-size: {}'.format(args.batchsize)) print('Num epoch: {}'.format(args.epoch)) print('==========================================') if comm.rank == 0: model = L.Classifier(MLP0(comm, args.unit)) elif comm.rank == 1: model = MLP1(comm, args.unit, 10) if device >= 0: chainer.cuda.get_device_from_id(device).use() model.to_gpu() optimizer = chainer.optimizers.Adam() optimizer.setup(model) # Iterate dataset only on worker 0. train, test = chainer.datasets.get_mnist() if comm.rank == 1: train = chainermn.datasets.create_empty_dataset(train) test = chainermn.datasets.create_empty_dataset(test) train_iter = chainer.iterators.SerialIterator( train, args.batchsize, shuffle=False) test_iter = chainer.iterators.SerialIterator( test, args.batchsize, repeat=False, shuffle=False) updater = training.StandardUpdater(train_iter, optimizer, device=device) trainer = training.Trainer(updater, (args.epoch, 'epoch'), out=args.out) trainer.extend(extensions.Evaluator(test_iter, model, device=device)) # Some display and output extentions are necessary only for worker 0. if comm.rank == 0: trainer.extend(extensions.DumpGraph('main/loss')) trainer.extend(extensions.LogReport()) trainer.extend(extensions.PrintReport( ['epoch', 'main/loss', 'validation/main/loss', 'main/accuracy', 'validation/main/accuracy', 'elapsed_time'])) trainer.extend(extensions.ProgressBar()) trainer.run()
def main(): parser = argparse.ArgumentParser( description='Train GAN') parser.add_argument('--batch_size', '-b', type=int, default=64) parser.add_argument('--max_iter', '-m', type=int, default=60000) parser.add_argument('--gpu', '-g', type=int, default=0, help='GPU ID (negative value indicates CPU)') parser.add_argument('--out', '-o', default='result', help='Directory to output the result') parser.add_argument('--eval_interval', '-e', type=int, default=200, help='Interval of evaluating generator') parser.add_argument("--learning_rate_g", type=float, default=0.0002, help="Learning rate for generator") parser.add_argument("--learning_rate_d", type=float, default=0.0002, help="Learning rate for discriminator") parser.add_argument('--gen_class', default='', help='generator class') parser.add_argument('--dis_class', default='', help='discriminator class') parser.add_argument("--load_gen_model", default='', help='load generator model') parser.add_argument("--load_dis_model", default='', help='load discriminator model') parser.add_argument("--lambda_gp", type=float, default=10, help='gradient penalty') parser.add_argument("--image_size", type=int, default=64, help='image size') parser.add_argument("--image_channels", type=int, default=3, help='number of image channels') parser.add_argument("--latent_len", type=int, default=128, help='latent vector length') parser.add_argument("--load_dataset", default='celeba_train', help='load dataset') parser.add_argument("--dataset_path", "-d", default=settings.CELEBA_PATH, help='dataset directory') args = parser.parse_args() print(args) if args.gpu >= 0: chainer.cuda.get_device(args.gpu).use() if args.gen_class != '': gen = eval(args.gen_class) else: gen = DCGANGenerator(latent=args.latent_len, out_ch=args.image_channels) if args.dis_class != '': dis = eval(args.dis_class) else: dis = DCGANEncoder(in_ch=args.image_channels, use_bn=False, out_len=256) if args.load_gen_model != '': serializers.load_npz(args.load_gen_model, gen) print("Generator model loaded") if args.load_dis_model != '': serializers.load_npz(args.load_dis_model, dis) print("Discriminator model loaded") if args.gpu >= 0: gen.to_gpu() dis.to_gpu() print("use gpu {}".format(args.gpu)) opt_g = make_adam(gen, lr=args.learning_rate_g, beta1=0.5) opt_d = make_adam(dis, lr=args.learning_rate_d, beta1=0.5) train_dataset = getattr(datasets, args.load_dataset)(path=args.dataset_path) train_iter = chainer.iterators.MultiprocessIterator( train_dataset, args.batch_size, n_processes=4) updater = Updater( models=(gen, dis), iterator={ 'main': train_iter, }, optimizer={ 'gen': opt_g, 'dis': opt_d}, device=args.gpu, params={ 'batch_size': args.batch_size, 'img_size': args.image_size, 'img_chan': args.image_channels, 'lambda_gp': args.lambda_gp, 'latent_len': args.latent_len, }, ) trainer = training.Trainer(updater, (args.max_iter, 'iteration'), out=args.out) model_save_interval = (4000, 'iteration') eval_interval = (args.eval_interval, 'iteration') trainer.extend(extensions.snapshot_object( gen, 'gen_{.updater.iteration}.npz'), trigger=model_save_interval) trainer.extend(extensions.snapshot_object( dis, 'dis_{.updater.iteration}.npz'), trigger=model_save_interval) log_keys = ['epoch', 'iteration', 'gen/loss', 'dis/loss', 'dis/loss_gp'] trainer.extend(extensions.LogReport(keys=log_keys, trigger=(20, 'iteration'))) trainer.extend(extensions.PrintReport(log_keys), trigger=(20, 'iteration')) trainer.extend(extensions.ProgressBar(update_interval=50)) trainer.extend( gan_sampling(gen, args.out+"/preview/", args.gpu), trigger=eval_interval ) trainer.run()
def main(): parser = argparse.ArgumentParser(description='Chainer example: MNIST') parser.add_argument('--batchsize', '-b', type=int, default=100, help='Number of images in each mini-batch') parser.add_argument('--epoch', '-e', type=int, default=20, help='Number of sweeps over the dataset to train') parser.add_argument('--frequency', '-f', type=int, default=-1, help='Frequency of taking a snapshot') parser.add_argument('--gpu', '-g', type=int, default=-1, help='GPU ID (negative value indicates CPU)') parser.add_argument('--out', '-o', default='result', help='Directory to output the result') parser.add_argument('--model', '-m', default='MLP', help='Choose the model: MLP or MLPSideEffect') parser.add_argument('--resume', '-r', default='', help='Resume the training from snapshot') parser.add_argument('--unit', '-u', type=int, default=1000, help='Number of units') parser.add_argument('--noplot', dest='plot', action='store_false', help='Disable PlotReport extension') args = parser.parse_args() print('GPU: {}'.format(args.gpu)) print('# unit: {}'.format(args.unit)) print('# Minibatch-size: {}'.format(args.batchsize)) print('# epoch: {}'.format(args.epoch)) print('') # Set up a neural network to train # Classifier reports softmax cross entropy loss and accuracy at every # iteration, which will be used by the PrintReport extension below. if args.model == 'MLP': model = L.Classifier(MLP(args.unit, 10)) elif args.model == 'MLPSideEffect': model = L.Classifier(MLPSideEffect(args.unit, 10)) if args.gpu >= 0: # Make a specified GPU current chainer.cuda.get_device_from_id(args.gpu).use() model.to_gpu() # Copy the model to the GPU # Setup an optimizer optimizer = chainer.optimizers.Adam() optimizer.setup(model) # Load the MNIST dataset train, test = chainer.datasets.get_mnist() train_iter = chainer.iterators.SerialIterator(train, args.batchsize) test_iter = chainer.iterators.SerialIterator(test, args.batchsize, repeat=False, shuffle=False) # Set up a trainer updater = training.updaters.StandardUpdater(train_iter, optimizer, device=args.gpu) trainer = training.Trainer(updater, (args.epoch, 'epoch'), out=args.out) # Evaluate the model with the test dataset for each epoch trainer.extend(extensions.Evaluator(test_iter, model, device=args.gpu)) # Dump a computational graph from 'loss' variable at the first iteration # The "main" refers to the target link of the "main" optimizer. trainer.extend(extensions.dump_graph('main/loss')) # Take a snapshot for each specified epoch frequency = args.epoch if args.frequency == -1 else max(1, args.frequency) trainer.extend(extensions.snapshot(), trigger=(frequency, 'epoch')) # Write a log of evaluation statistics for each epoch trainer.extend(extensions.LogReport()) # Save two plot images to the result dir if args.plot and extensions.PlotReport.available(): trainer.extend( extensions.PlotReport(['main/loss', 'validation/main/loss'], 'epoch', file_name='loss.png')) trainer.extend( extensions.PlotReport( ['main/accuracy', 'validation/main/accuracy'], 'epoch', file_name='accuracy.png')) # Print selected entries of the log to stdout # Here "main" refers to the target link of the "main" optimizer again, and # "validation" refers to the default name of the Evaluator extension. # Entries other than 'epoch' are reported by the Classifier link, called by # either the updater or the evaluator. trainer.extend( extensions.PrintReport([ 'epoch', 'main/loss', 'validation/main/loss', 'main/accuracy', 'validation/main/accuracy', 'elapsed_time' ])) # Print a progress bar to stdout trainer.extend(extensions.ProgressBar()) if args.resume: # Resume from a snapshot chainer.serializers.load_npz(args.resume, trainer) # Run the training trainer.run()
def main(): parser = argparse.ArgumentParser(description='Chainer example: seq2seq') parser.add_argument('SOURCE', help='source sentence list') parser.add_argument('TARGET', help='target sentence list') parser.add_argument('SOURCE_VOCAB', help='source vocabulary file') parser.add_argument('TARGET_VOCAB', help='target vocabulary file') parser.add_argument('--validation-source', help='source sentence list for validation') parser.add_argument('--validation-target', help='target sentence list for validation') parser.add_argument('--batchsize', '-b', type=int, default=64, help='number of sentence pairs in each mini-batch') parser.add_argument('--epoch', '-e', type=int, default=20, help='number of sweeps over the dataset to train') parser.add_argument('--gpu', '-g', type=int, default=-1, help='GPU ID (negative value indicates CPU)') parser.add_argument('--resume', '-r', default='', help='resume the training from snapshot') parser.add_argument('--unit', '-u', type=int, default=1024, help='number of units') parser.add_argument('--layer', '-l', type=int, default=3, help='number of layers') parser.add_argument('--min-source-sentence', type=int, default=1, help='minimium length of source sentence') parser.add_argument('--max-source-sentence', type=int, default=50, help='maximum length of source sentence') parser.add_argument('--min-target-sentence', type=int, default=1, help='minimium length of target sentence') parser.add_argument('--max-target-sentence', type=int, default=50, help='maximum length of target sentence') parser.add_argument('--log-interval', type=int, default=200, help='number of iteration to show log') parser.add_argument('--validation-interval', type=int, default=4000, help='number of iteration to evlauate the model ' 'with validation dataset') parser.add_argument('--out', '-o', default='result', help='directory to output the result') args = parser.parse_args() source_ids = load_vocabulary(args.SOURCE_VOCAB) target_ids = load_vocabulary(args.TARGET_VOCAB) train_source = load_data(source_ids, args.SOURCE) train_target = load_data(target_ids, args.TARGET) assert len(train_source) == len(train_target) train_data = [ (s, t) for s, t in six.moves.zip(train_source, train_target) if args.min_source_sentence <= len(s) <= args.max_source_sentence and args.min_source_sentence <= len(t) <= args.max_source_sentence ] train_source_unknown = calculate_unknown_ratio([s for s, _ in train_data]) train_target_unknown = calculate_unknown_ratio([t for _, t in train_data]) print('Source vocabulary size: %d' % len(source_ids)) print('Target vocabulary size: %d' % len(target_ids)) print('Train data size: %d' % len(train_data)) print('Train source unknown ratio: %.2f%%' % (train_source_unknown * 100)) print('Train target unknown ratio: %.2f%%' % (train_target_unknown * 100)) target_words = {i: w for w, i in target_ids.items()} source_words = {i: w for w, i in source_ids.items()} model = Seq2seq(args.layer, len(source_ids), len(target_ids), args.unit) if args.gpu >= 0: chainer.cuda.get_device(args.gpu).use() model.to_gpu(args.gpu) optimizer = chainer.optimizers.Adam() optimizer.setup(model) train_iter = chainer.iterators.SerialIterator(train_data, args.batchsize) updater = training.updaters.StandardUpdater(train_iter, optimizer, converter=convert, device=args.gpu) trainer = training.Trainer(updater, (args.epoch, 'epoch'), out=args.out) trainer.extend( extensions.LogReport(trigger=(args.log_interval, 'iteration'))) trainer.extend(extensions.PrintReport([ 'epoch', 'iteration', 'main/loss', 'validation/main/loss', 'main/perp', 'validation/main/perp', 'validation/main/bleu', 'elapsed_time' ]), trigger=(args.log_interval, 'iteration')) if args.validation_source and args.validation_target: test_source = load_data(source_ids, args.validation_source) test_target = load_data(target_ids, args.validation_target) assert len(test_source) == len(test_target) test_data = list(six.moves.zip(test_source, test_target)) test_data = [(s, t) for s, t in test_data if 0 < len(s) and 0 < len(t)] test_source_unknown = calculate_unknown_ratio( [s for s, _ in test_data]) test_target_unknown = calculate_unknown_ratio( [t for _, t in test_data]) print('Validation data: %d' % len(test_data)) print('Validation source unknown ratio: %.2f%%' % (test_source_unknown * 100)) print('Validation target unknown ratio: %.2f%%' % (test_target_unknown * 100)) @chainer.training.make_extension() def translate(trainer): source, target = test_data[numpy.random.choice(len(test_data))] result = model.translate([model.xp.array(source)])[0] source_sentence = ' '.join([source_words[x] for x in source]) target_sentence = ' '.join([target_words[y] for y in target]) result_sentence = ' '.join([target_words[y] for y in result]) print('# source : ' + source_sentence) print('# result : ' + result_sentence) print('# expect : ' + target_sentence) trainer.extend(translate, trigger=(args.validation_interval, 'iteration')) trainer.extend(CalculateBleu(model, test_data, 'validation/main/bleu', device=args.gpu), trigger=(args.validation_interval, 'iteration')) print('start training') trainer.run()
def train(mode): Dt1_train_dir = "/media/wutong/New Volume/reseach/Dataset/OU-ISIR_by_Setoguchi/Gallery/signed/128_3ch/CV01_(Gallery&Probe)_2nd" train1 = load_GEI(path_dir=Dt1_train_dir, mode=True) Dt2_train_dir = "/media/wutong/New Volume/reseach/Dataset/OU-ISIR_by_Setoguchi/Gallery/signed/128_3ch/CV01_Dt2_(Gallery&Probe)" train2 = load_GEI(path_dir=Dt2_train_dir, mode=True) Dt3_train_dir = "/media/wutong/New Volume/reseach/Dataset/OU-ISIR_by_Setoguchi/Gallery/signed/128_3ch/CV01_Dt3_(Gallery&Probe)" train3 = load_GEI(path_dir=Dt3_train_dir, mode=True) model = Multi_modal_GEINet() model.to_gpu() # train_iter = iterators.MultiprocessIterator(train, batch_size=239) Dt1_train_iter = iterators.SerialIterator(train1, batch_size=239, shuffle=False) Dt2_train_iter = iterators.SerialIterator(train2, batch_size=239, shuffle=False) Dt3_train_iter = iterators.SerialIterator(train3, batch_size=239, shuffle=False) # optimizer = chainer.optimizers.SGD(lr=0.02) optimizer = chainer.optimizers.MomentumSGD(lr=0.02, momentum=0.9) optimizer.setup(model) optimizer.add_hook(chainer.optimizer.WeightDecay(0.01)) # updater = training.ParallelUpdater(train_iter, optimizer, devices={'main': 0, 'second': 1}) updater = Multi_modal_Updater(model, Dt1_train_iter, Dt2_train_iter, Dt3_train_iter, optimizer, device=0) epoch = 6250 trainer = training.Trainer( updater, (epoch, 'epoch'), out='/home/wutong/Setoguchi/chainer_files/result') # trainer.extend(extensions.Evaluator(test_iter, model, device=0)) trainer.extend(extensions.ExponentialShift(attr='lr', rate=0.56234), trigger=(1250, 'epoch')) trainer.extend( extensions.LogReport(log_name='SFDEI_log', trigger=(20, "epoch"))) trainer.extend((extensions.snapshot_object( model, filename='model_shapshot_{.update.epoch}')), trigger=(1250, 'epoch')) trainer.extend(extensions.snapshot(), trigger=(1250, 'epoch')) trainer.extend(extensions.PrintReport(['epoch', 'accuracy', 'loss'])) # 'validation/main/accuracy']), # trigger=(1, "epoch")) trainer.extend( extensions.dump_graph(root_name="loss", out_name="multi_modal_3.dot")) trainer.extend(extensions.PlotReport(["loss"]), trigger=(50, 'epoch')) trainer.extend(extensions.ProgressBar()) if mode == True: # Run the trainer trainer.run() else: serializers.load_npz( "/home/wutong/Setoguchi/chainer_files/SFDEINet_multi_modal/SFDEINet_multi_modal_model", trainer) trainer.run() serializers.save_npz( "/home/wutong/Setoguchi/chainer_files/SFDEINet_multi_modal/SFDEINet_multi_modal_model", trainer) serializers.save_npz( "/home/wutong/Setoguchi/chainer_files/SFDEINet_multi_modal/SFDEINet_multi_modal_model", model)
def main(): parser = argparse.ArgumentParser(description='Train script') parser.add_argument('--batchsize', type=int, default=64) parser.add_argument('--max_iter', type=int, default=100000) parser.add_argument('--gpu', '-g', type=int, default=0, help='GPU ID (negative value indicates CPU)') parser.add_argument('--out', '-o', default='result', help='Directory to output the result') parser.add_argument('--snapshot_interval', type=int, default=10000, help='Interval of snapshot') parser.add_argument('--evaluation_interval', type=int, default=10000, help='Interval of evaluation') parser.add_argument('--display_interval', type=int, default=100, help='Interval of displaying log to console') parser.add_argument('--n_dis', type=int, default=1, help='number of discriminator update per generator update') # 5 parser.add_argument('--gamma', type=float, default=0.5, help='hyperparameter gamma') parser.add_argument('--lam', type=float, default=10, help='gradient penalty') parser.add_argument('--adam_alpha', type=float, default=0.0002, help='alpha in Adam optimizer') parser.add_argument('--adam_beta1', type=float, default=0.5, help='beta1 in Adam optimizer') # 0.0 parser.add_argument('--adam_beta2', type=float, default=0.9, help='beta2 in Adam optimizer') # 0.9 parser.add_argument('--output_dim', type=int, default=256, help='output dimension of the discriminator (for cramer GAN)') parser.add_argument('--data-dir', type=str, default="") parser.add_argument('--image-npz', type=str, default="") parser.add_argument('--n-hidden', type=int, default=128) parser.add_argument('--resume', type=str, default="") parser.add_argument('--ch', type=int, default=512) parser.add_argument('--snapshot-iter', type=int, default=0) args = parser.parse_args() record_setting(args.out) report_keys = ["loss_dis", "loss_gen"] # Set up dataset if args.image_npz != '': from c128dcgan.dataset import NPZColorDataset train_dataset = NPZColorDataset(npz=args.image_npz) elif args.data_dir != '': from c128dcgan.dataset import Color128x128Dataset train_dataset = Color128x128Dataset(args.data_dir) train_iter = chainer.iterators.SerialIterator(train_dataset, args.batchsize) # Setup algorithm specific networks and updaters models = [] opts = {} updater_args = { "iterator": {'main': train_iter}, "device": args.gpu } # fixed algorithm #from c128gan import Updater generator = common.net.C128Generator(ch=args.ch, n_hidden=args.n_hidden) discriminator = common.net.SND128Discriminator(ch=args.ch) models = [generator, discriminator] from dcgan.updater import Updater if args.gpu >= 0: chainer.cuda.get_device_from_id(args.gpu).use() print("use gpu {}".format(args.gpu)) for m in models: m.to_gpu() # Set up optimizers opts["opt_gen"] = make_optimizer(generator, args.adam_alpha, args.adam_beta1, args.adam_beta2) opts["opt_dis"] = make_optimizer(discriminator, args.adam_alpha, args.adam_beta1, args.adam_beta2) updater_args["optimizer"] = opts updater_args["models"] = models # Set up updater and trainer updater = Updater(**updater_args) trainer = training.Trainer(updater, (args.max_iter, 'iteration'), out=args.out) # Set up logging for m in models: trainer.extend(extensions.snapshot_object( m, m.__class__.__name__ + '_{.updater.iteration}.npz'), trigger=(args.snapshot_interval, 'iteration')) trainer.extend(extensions.LogReport(keys=report_keys, trigger=(args.display_interval, 'iteration'))) trainer.extend(extensions.PrintReport(report_keys), trigger=(args.display_interval, 'iteration')) trainer.extend(sample_generate(generator, args.out), trigger=(args.evaluation_interval, 'iteration'), priority=extension.PRIORITY_WRITER) trainer.extend(sample_generate_light(generator, args.out), trigger=(args.evaluation_interval // 10, 'iteration'), priority=extension.PRIORITY_WRITER) trainer.extend(extensions.ProgressBar(update_interval=10)) if args.snapshot_iter == 0: snap_iter= args.max_iter // 100 else: snap_iter = args.snapshot_iter trainer.extend(extensions.snapshot(), trigger=(snap_iter , 'iteration')) # resume if args.resume != "": chainer.serializers.load_npz(args.resume, trainer) # Run the training trainer.run()
if __name__=='__main__': model = L.Classifier(MyModel()) if os.path.isfile('./dataset.pickle'): print("dataset.pickle is exist. loading...") with open('./dataset.pickle', mode='rb') as f: train, test = pickle.load(f) print("Loaded") else: datasets = dataset.Dataset("mouth") train, test = datasets.get_dataset() with open('./dataset.pickle', mode='wb') as f: pickle.dump((train, test), f) print("saving train and test...") optimizer = optimizers.MomentumSGD(lr=0.001, momentum=0.9) optimizer.setup(model) train_iter = iterators.SerialIterator(train, 64) test_iter = iterators.SerialIterator(test, 64, repeat=False, shuffle=True) updater = training.StandardUpdater(train_iter, optimizer, device=-1) trainer = training.Trainer(updater, (800, 'epoch'), out='{}_model_result'.format(MyModel.__class__.__name__)) trainer.extend(extensions.dump_graph("main/loss")) trainer.extend(extensions.Evaluator(test_iter, model, device=-1)) trainer.extend(extensions.LogReport()) trainer.extend(extensions.PrintReport( ['epoch', 'main/loss', 'validation/main/loss', 'main/accuracy', 'validation/main/accuracy'])) trainer.extend(extensions.PlotReport(['main/loss', 'validation/main/loss'], x_key='epoch', file_name='loss.png')) trainer.extend(extensions.PlotReport(['main/accuracy', 'validation/main/accuracy'], x_key='epoch', file_name='accuracy.png')) trainer.extend(extensions.ProgressBar()) trainer.run() print("Learn END")
def train(args): '''RUN TRAINING''' # seed setting torch.manual_seed(args.seed) # use determinisitic computation or not if args.debugmode < 1: torch.backends.cudnn.deterministic = False logging.info('torch cudnn deterministic is disabled') else: torch.backends.cudnn.deterministic = True # check cuda availability if not torch.cuda.is_available(): logging.warning('cuda is not available') # get input and output dimension info with open(args.valid_json, 'rb') as f: valid_json = json.load(f)['utts'] utts = list(valid_json.keys()) # reverse input and output dimension idim = int(valid_json[utts[0]]['output'][0]['shape'][1]) odim = int(valid_json[utts[0]]['input'][0]['shape'][1]) if args.use_cbhg: args.spc_dim = int(valid_json[utts[0]]['input'][1]['shape'][1]) if args.use_speaker_embedding: args.spk_embed_dim = int(valid_json[utts[0]]['input'][1]['shape'][0]) else: args.spk_embed_dim = None logging.info('#input dims : ' + str(idim)) logging.info('#output dims: ' + str(odim)) # write model config if not os.path.exists(args.outdir): os.makedirs(args.outdir) model_conf = args.outdir + '/model.json' with open(model_conf, 'wb') as f: logging.info('writing a model config file to' + model_conf) f.write( json.dumps((idim, odim, vars(args)), indent=4, sort_keys=True).encode('utf_8')) for key in sorted(vars(args).keys()): logging.info('ARGS: ' + key + ': ' + str(vars(args)[key])) # specify model architecture tacotron2 = Tacotron2(idim, odim, args) logging.info(tacotron2) # check the use of multi-gpu if args.ngpu > 1: tacotron2 = torch.nn.DataParallel(tacotron2, device_ids=list(range(args.ngpu))) logging.info('batch size is automatically increased (%d -> %d)' % (args.batch_size, args.batch_size * args.ngpu)) args.batch_size *= args.ngpu # set torch device device = torch.device("cuda" if args.ngpu > 0 else "cpu") tacotron2 = tacotron2.to(device) # define loss model = Tacotron2Loss(tacotron2, args.use_masking, args.bce_pos_weight) reporter = model.reporter # Setup an optimizer optimizer = torch.optim.Adam(model.parameters(), args.lr, eps=args.eps, weight_decay=args.weight_decay) # FIXME: TOO DIRTY HACK setattr(optimizer, 'target', reporter) setattr(optimizer, 'serialize', lambda s: reporter.serialize(s)) # Setup a converter converter = CustomConverter(True, args.use_speaker_embedding, args.use_cbhg) # read json data with open(args.train_json, 'rb') as f: train_json = json.load(f)['utts'] with open(args.valid_json, 'rb') as f: valid_json = json.load(f)['utts'] # make minibatch list (variable length) train_batchset = make_batchset(train_json, args.batch_size, args.maxlen_in, args.maxlen_out, args.minibatches, args.batch_sort_key) valid_batchset = make_batchset(valid_json, args.batch_size, args.maxlen_in, args.maxlen_out, args.minibatches, args.batch_sort_key) # hack to make batchsze argument as 1 # actual bathsize is included in a list if args.n_iter_processes > 0: train_iter = chainer.iterators.MultiprocessIterator( TransformDataset(train_batchset, converter.transform), batch_size=1, n_processes=args.n_iter_processes, n_prefetch=8, maxtasksperchild=20) valid_iter = chainer.iterators.MultiprocessIterator( TransformDataset(valid_batchset, converter.transform), batch_size=1, repeat=False, shuffle=False, n_processes=args.n_iter_processes, n_prefetch=8, maxtasksperchild=20) else: train_iter = chainer.iterators.SerialIterator(TransformDataset( train_batchset, converter.transform), batch_size=1) valid_iter = chainer.iterators.SerialIterator(TransformDataset( valid_batchset, converter.transform), batch_size=1, repeat=False, shuffle=False) # Set up a trainer updater = CustomUpdater(model, args.grad_clip, train_iter, optimizer, converter, device) trainer = training.Trainer(updater, (args.epochs, 'epoch'), out=args.outdir) # Resume from a snapshot if args.resume: logging.info('resumed from %s' % args.resume) torch_resume(args.resume, trainer) # Evaluate the model with the test dataset for each epoch trainer.extend( CustomEvaluator(model, valid_iter, reporter, converter, device)) # Save snapshot for each epoch trainer.extend(torch_snapshot(), trigger=(1, 'epoch')) # Save best models trainer.extend( extensions.snapshot_object(tacotron2, 'model.loss.best', savefun=torch_save), trigger=training.triggers.MinValueTrigger('validation/main/loss')) # Save attention figure for each epoch if args.num_save_attention > 0: data = sorted(list(valid_json.items())[:args.num_save_attention], key=lambda x: int(x[1]['input'][0]['shape'][1]), reverse=True) if hasattr(tacotron2, "module"): att_vis_fn = tacotron2.module.calculate_all_attentions else: att_vis_fn = tacotron2.calculate_all_attentions trainer.extend(PlotAttentionReport(att_vis_fn, data, args.outdir + '/att_ws', converter=CustomConverter( False, args.use_speaker_embedding), device=device, reverse=True), trigger=(1, 'epoch')) # Make a plot for training and validation values plot_keys = [ 'main/loss', 'validation/main/loss', 'main/l1_loss', 'validation/main/l1_loss', 'main/mse_loss', 'validation/main/mse_loss', 'main/bce_loss', 'validation/main/bce_loss' ] trainer.extend( extensions.PlotReport(['main/l1_loss', 'validation/main/l1_loss'], 'epoch', file_name='l1_loss.png')) trainer.extend( extensions.PlotReport(['main/mse_loss', 'validation/main/mse_loss'], 'epoch', file_name='mse_loss.png')) trainer.extend( extensions.PlotReport(['main/bce_loss', 'validation/main/bce_loss'], 'epoch', file_name='bce_loss.png')) if args.use_cbhg: plot_keys += [ 'main/cbhg_l1_loss', 'validation/main/cbhg_l1_loss', 'main/cbhg_mse_loss', 'validation/main/cbhg_mse_loss' ] trainer.extend( extensions.PlotReport( ['main/cbhg_l1_loss', 'validation/main/cbhg_l1_loss'], 'epoch', file_name='cbhg_l1_loss.png')) trainer.extend( extensions.PlotReport( ['main/cbhg_mse_loss', 'validation/main/cbhg_mse_loss'], 'epoch', file_name='cbhg_mse_loss.png')) trainer.extend( extensions.PlotReport(plot_keys, 'epoch', file_name='loss.png')) # Write a log of evaluation statistics for each epoch trainer.extend(extensions.LogReport(trigger=(REPORT_INTERVAL, 'iteration'))) report_keys = plot_keys[:] report_keys[0:0] = ['epoch', 'iteration', 'elapsed_time'] trainer.extend(extensions.PrintReport(report_keys), trigger=(REPORT_INTERVAL, 'iteration')) trainer.extend(extensions.ProgressBar(update_interval=REPORT_INTERVAL)) # Run the training trainer.run()
def main(): archs = { #'alex': alex.Alex, #'alex_fp16': alex.AlexFp16, #'googlenet': googlenet.GoogLeNet, #'googlenetbn': googlenetbn.GoogLeNetBN, #'googlenetbn_fp16': googlenetbn.GoogLeNetBNFp16, 'nin': nin.NIN, #'resnet50': resnet50.ResNet50 } parser = argparse.ArgumentParser( description='Learning convnet from ILSVRC2012 dataset') parser.add_argument('train', help='Path to training image-label list file') parser.add_argument('val', help='Path to validation image-label list file') parser.add_argument('--arch', '-a', choices=archs.keys(), default='nin', help='Convnet architecture') parser.add_argument('--batchsize', '-B', type=int, default=32, help='Learning minibatch size') parser.add_argument('--epoch', '-E', type=int, default=10, help='Number of epochs to train') parser.add_argument('--gpu', '-g', type=int, default=-1, help='GPU ID (negative value indicates CPU') parser.add_argument('--initmodel', help='Initialize the model from given file') parser.add_argument('--loaderjob', '-j', type=int, help='Number of parallel data loading processes') parser.add_argument('--mean', '-m', default='mean.npy', help='Mean file (computed by compute_mean.py)') parser.add_argument('--resume', '-r', default='', help='Initialize the trainer from given file') parser.add_argument('--out', '-o', default='result', help='Output directory') parser.add_argument('--root', '-R', default='.', help='Root directory path of image files') parser.add_argument('--val_batchsize', '-b', type=int, default=250, help='Validation minibatch size') parser.add_argument('--test', action='store_true') parser.set_defaults(test=False) args = parser.parse_args() # Initialize the model to train model = archs[args.arch]() if args.initmodel: print('Load model from', args.initmodel) chainer.serializers.load_npz(args.initmodel, model) if args.gpu >= 0: chainer.cuda.get_device_from_id(args.gpu).use() # Make the GPU current model.to_gpu() # Load the datasets and mean file mean = np.load(args.mean) train = PreprocessedDataset(args.train, args.root, mean, model.insize) val = PreprocessedDataset(args.val, args.root, mean, model.insize, False) # These iterators load the images with subprocesses running in parallel to # the training/validation. train_iter = chainer.iterators.MultiprocessIterator( train, args.batchsize, n_processes=args.loaderjob) val_iter = chainer.iterators.MultiprocessIterator( val, args.val_batchsize, repeat=False, n_processes=args.loaderjob) # Set up an optimizer optimizer = chainer.optimizers.MomentumSGD(lr=0.01, momentum=0.9) optimizer.setup(model) # Set up a trainer updater = training.StandardUpdater(train_iter, optimizer, device=args.gpu) trainer = training.Trainer(updater, (args.epoch, 'epoch'), args.out) val_interval = (500 if args.test else 100000), 'iteration' log_interval = (500 if args.test else 1000), 'iteration' trainer.extend(extensions.Evaluator(val_iter, model, device=args.gpu), trigger=val_interval) trainer.extend(extensions.dump_graph('main/loss')) trainer.extend(extensions.snapshot(), trigger=val_interval) trainer.extend(extensions.snapshot_object( model, 'model_iter_{.updater.iteration}'), trigger=val_interval) # Be careful to pass the interval directly to LogReport # (it determines when to emit log rather than when to read observations) trainer.extend(extensions.LogReport(trigger=log_interval)) trainer.extend(extensions.observe_lr(), trigger=log_interval) trainer.extend(extensions.PrintReport([ 'epoch', 'iteration', 'main/loss', 'validation/main/loss', 'main/accuracy', 'validation/main/accuracy', 'lr' ]), trigger=log_interval) trainer.extend(extensions.ProgressBar(update_interval=10)) if args.resume: chainer.serializers.load_npz(args.resume, trainer) trainer.run()
test_interval = 1, 'epoch' snapshot_interval = 10, 'epoch' log_interval = 100, 'iteration' trainer.extend(extensions.Evaluator(test_iter, model, device=args.gpus[0]), trigger=test_interval) trainer.extend(extensions.dump_graph('main/loss')) trainer.extend( extensions.snapshot(filename='snapshot_epoch_{.updater.epoch}'), trigger=snapshot_interval) trainer.extend(extensions.snapshot_object(model, 'model_epoch_{.updater.epoch}'), trigger=snapshot_interval) trainer.extend(extensions.LogReport(trigger=log_interval)) trainer.extend(extensions.observe_lr(), trigger=log_interval) log_list = [ 'epoch', 'iteration', 'main/loss', 'main/accuracy', 'validation/main/loss', 'validation/main/accuracy', 'lr', 'elapsed_time' ] trainer.extend(extensions.PrintReport(log_list), trigger=log_interval) trainer.extend(extensions.ProgressBar(update_interval=10)) if args.resume: # Resume from a snapshot chainer.serializers.load_npz(args.resume, trainer) trainer.run()
def main(): parser = argparse.ArgumentParser() parser.add_argument('--batchsize', '-b', type=int, default=20, help='Number of examples in each mini-batch') parser.add_argument('--bproplen', '-l', type=int, default=35, help='Number of words in each mini-batch ' '(= length of truncated BPTT)') parser.add_argument('--epoch', '-e', type=int, default=39, help='Number of sweeps over the dataset to train') parser.add_argument('--gpu', '-g', type=int, default=-1, help='GPU ID (negative value indicates CPU)') parser.add_argument('--gradclip', '-c', type=float, default=5, help='Gradient norm threshold to clip') parser.add_argument('--out', '-o', default='result', help='Directory to output the result') parser.add_argument('--resume', '-r', default='', help='Resume the training from snapshot') parser.add_argument('--test', action='store_true', help='Use tiny datasets for quick tests') parser.set_defaults(test=False) parser.add_argument('--unit', '-u', type=int, default=650, help='Number of LSTM units in each layer') args = parser.parse_args() # Load the Penn Tree Bank long word sequence dataset # train, val, test = chainer.datasets.get_ptb_words() data = json.load(open("lyric_indexes.json")) train = np.array(data['train'], dtype=np.int32) val = np.array(data['val'], dtype=np.int32) test = np.array(data['test'], dtype=np.int32) n_vocab = data['num_vocab'] # train is just an array of integers print('#vocab =', n_vocab) if args.test: train = train[:100] val = val[:100] test = test[:100] train_iter = ParallelSequentialIterator(train, args.batchsize) val_iter = ParallelSequentialIterator(val, 1, repeat=False) test_iter = ParallelSequentialIterator(test, 1, repeat=False) # Prepare an RNNLM model rnn = RNNForLM(n_vocab, args.unit) model = L.Classifier(rnn) model.compute_accuracy = False # we only want the perplexity if args.gpu >= 0: chainer.cuda.get_device(args.gpu).use() # make the GPU current model.to_gpu() # Set up an optimizer optimizer = chainer.optimizers.SGD(lr=1.0) optimizer.setup(model) optimizer.add_hook(chainer.optimizer.GradientClipping(args.gradclip)) # Set up a trainer updater = BPTTUpdater(train_iter, optimizer, args.bproplen, args.gpu) trainer = training.Trainer(updater, (args.epoch, 'epoch'), out=args.out) eval_model = model.copy() # Model with shared params and distinct states eval_rnn = eval_model.predictor eval_rnn.train = False trainer.extend(extensions.Evaluator( val_iter, eval_model, device=args.gpu, # Reset the RNN state at the beginning of each evaluation eval_hook=lambda _: eval_rnn.reset_state())) interval = 10 if args.test else 500 trainer.extend(extensions.LogReport(postprocess=compute_perplexity, trigger=(interval, 'iteration'))) trainer.extend(extensions.PrintReport( ['epoch', 'iteration', 'perplexity', 'val_perplexity'] ), trigger=(interval, 'iteration')) trainer.extend(extensions.ProgressBar( update_interval=1 if args.test else 10)) trainer.extend(extensions.snapshot()) trainer.extend(extensions.snapshot_object( model, 'model_iter_{.updater.iteration}')) if args.resume: chainer.serializers.load_npz(args.resume, trainer) trainer.run() # Evaluate the final model print('test') eval_rnn.reset_state() evaluator = extensions.Evaluator(test_iter, eval_model, device=args.gpu) result = evaluator() print('test perplexity:', np.exp(float(result['main/loss'])))
def main(): parser = argparse.ArgumentParser( description='ChainerCV training example: Faster R-CNN') parser.add_argument('--gpu', '-g', type=int, default=-1) parser.add_argument('--lr', '-l', type=float, default=1e-3) parser.add_argument('--out', '-o', default='result', help='Output directory') parser.add_argument('--seed', '-s', type=int, default=0) parser.add_argument('--step_size', '-ss', type=int, default=50000) parser.add_argument('--iteration', '-i', type=int, default=70000) args = parser.parse_args() np.random.seed(args.seed) train_data = VOCDetectionDataset(split='trainval', year='2007') test_data = VOCDetectionDataset(split='test', year='2007', use_difficult=True, return_difficult=True) faster_rcnn = FasterRCNNVGG16(n_fg_class=len(voc_detection_label_names), pretrained_model='imagenet') faster_rcnn.use_preset('evaluate') model = FasterRCNNTrainChain(faster_rcnn) if args.gpu >= 0: model.to_gpu(args.gpu) chainer.cuda.get_device(args.gpu).use() optimizer = chainer.optimizers.MomentumSGD(lr=args.lr, momentum=0.9) optimizer.setup(model) optimizer.add_hook(chainer.optimizer.WeightDecay(rate=0.0005)) def transform(in_data): img, bbox, label = in_data _, H, W = img.shape img = faster_rcnn.prepare(img) _, o_H, o_W = img.shape scale = o_H / H bbox = transforms.resize_bbox(bbox, (W, H), (o_W, o_H)) # horizontally flip img, params = transforms.random_flip( img, x_random=True, return_param=True) bbox = transforms.flip_bbox(bbox, (o_W, o_H), params['x_flip']) return img, bbox, label, scale train_data = TransformDataset(train_data, transform) train_iter = chainer.iterators.MultiprocessIterator( train_data, batch_size=1, n_processes=None, shared_mem=100000000) test_iter = chainer.iterators.SerialIterator( test_data, batch_size=1, repeat=False, shuffle=False) updater = chainer.training.updater.StandardUpdater( train_iter, optimizer, device=args.gpu) trainer = training.Trainer( updater, (args.iteration, 'iteration'), out=args.out) trainer.extend( extensions.snapshot_object(model.faster_rcnn, 'snapshot_model.npz'), trigger=(args.iteration, 'iteration')) trainer.extend(extensions.ExponentialShift('lr', 0.1), trigger=(args.step_size, 'iteration')) log_interval = 20, 'iteration' plot_interval = 3000, 'iteration' print_interval = 20, 'iteration' trainer.extend(chainer.training.extensions.observe_lr(), trigger=log_interval) trainer.extend(extensions.LogReport(trigger=log_interval)) trainer.extend(extensions.PrintReport( ['iteration', 'epoch', 'elapsed_time', 'lr', 'main/loss', 'main/roi_loc_loss', 'main/roi_cls_loss', 'main/rpn_loc_loss', 'main/rpn_cls_loss', 'validation/main/map', ]), trigger=print_interval) trainer.extend(extensions.ProgressBar(update_interval=10)) if extensions.PlotReport.available(): trainer.extend( extensions.PlotReport( ['main/loss'], file_name='loss.png', trigger=plot_interval ), trigger=plot_interval ) trainer.extend( DetectionVOCEvaluator( test_iter, model.faster_rcnn, use_07_metric=True), trigger=ManualScheduleTrigger( (args.step_size, args.iteration), 'iteration'), invoke_before_training=False) trainer.extend(extensions.dump_graph('main/loss')) trainer.run()
def train(args): """Train with the given args :param Namespace args: The program arguments """ # display chainer version logging.info('chainer version = ' + chainer.__version__) set_deterministic_chainer(args) # check cuda and cudnn availability if not chainer.cuda.available: logging.warning('cuda is not available') if not chainer.cuda.cudnn_enabled: logging.warning('cudnn is not available') # get special label ids unk = args.char_list_dict['<unk>'] eos = args.char_list_dict['<eos>'] # read tokens as a sequence of sentences train = read_tokens(args.train_label, args.char_list_dict) val = read_tokens(args.valid_label, args.char_list_dict) # count tokens n_train_tokens, n_train_oovs = count_tokens(train, unk) n_val_tokens, n_val_oovs = count_tokens(val, unk) logging.info('#vocab = ' + str(args.n_vocab)) logging.info('#sentences in the training data = ' + str(len(train))) logging.info('#tokens in the training data = ' + str(n_train_tokens)) logging.info('oov rate in the training data = %.2f %%' % (n_train_oovs / n_train_tokens * 100)) logging.info('#sentences in the validation data = ' + str(len(val))) logging.info('#tokens in the validation data = ' + str(n_val_tokens)) logging.info('oov rate in the validation data = %.2f %%' % (n_val_oovs / n_val_tokens * 100)) use_sortagrad = args.sortagrad == -1 or args.sortagrad > 0 # Create the dataset iterators train_iter = ParallelSentenceIterator(train, args.batchsize, max_length=args.maxlen, sos=eos, eos=eos, shuffle=not use_sortagrad) val_iter = ParallelSentenceIterator(val, args.batchsize, max_length=args.maxlen, sos=eos, eos=eos, repeat=False) logging.info('#iterations per epoch = ' + str(len(train_iter.batch_indices))) logging.info('#total iterations = ' + str(args.epoch * len(train_iter.batch_indices))) # Prepare an RNNLM model rnn = RNNLM(args.n_vocab, args.layer, args.unit, args.type) model = ClassifierWithState(rnn) if args.ngpu > 1: logging.warning( "currently, multi-gpu is not supported. use single gpu.") if args.ngpu > 0: # Make the specified GPU current gpu_id = 0 chainer.cuda.get_device_from_id(gpu_id).use() model.to_gpu() else: gpu_id = -1 # Save model conf to json model_conf = args.outdir + '/model.json' with open(model_conf, 'wb') as f: logging.info('writing a model config file to ' + model_conf) f.write( json.dumps(vars(args), indent=4, ensure_ascii=False, sort_keys=True).encode('utf_8')) # Set up an optimizer if args.opt == 'sgd': optimizer = chainer.optimizers.SGD(lr=1.0) elif args.opt == 'adam': optimizer = chainer.optimizers.Adam() optimizer.setup(model) optimizer.add_hook(chainer.optimizer.GradientClipping(args.gradclip)) updater = BPTTUpdater(train_iter, optimizer, gpu_id) trainer = training.Trainer(updater, (args.epoch, 'epoch'), out=args.outdir) trainer.extend(LMEvaluator(val_iter, model, device=gpu_id)) trainer.extend( extensions.LogReport(postprocess=compute_perplexity, trigger=(args.report_interval_iters, 'iteration'))) trainer.extend(extensions.PrintReport( ['epoch', 'iteration', 'perplexity', 'val_perplexity', 'elapsed_time']), trigger=(args.report_interval_iters, 'iteration')) trainer.extend( extensions.ProgressBar(update_interval=args.report_interval_iters)) trainer.extend( extensions.snapshot(filename='snapshot.ep.{.updater.epoch}')) trainer.extend( extensions.snapshot_object(model, 'rnnlm.model.{.updater.epoch}')) # MEMO(Hori): wants to use MinValueTrigger, but it seems to fail in resuming trainer.extend( MakeSymlinkToBestModel('validation/main/loss', 'rnnlm.model')) if use_sortagrad: trainer.extend( ShufflingEnabler([train_iter]), trigger=(args.sortagrad if args.sortagrad != -1 else args.epoch, 'epoch')) if args.resume: logging.info('resumed from %s' % args.resume) chainer.serializers.load_npz(args.resume, trainer) set_early_stop(trainer, args, is_lm=True) if args.tensorboard_dir is not None and args.tensorboard_dir != "": writer = SummaryWriter(args.tensorboard_dir) trainer.extend(TensorboardLogger(writer), trigger=(args.report_interval_iters, 'iteration')) trainer.run() check_early_stop(trainer, args.epoch) # compute perplexity for test set if args.test_label: logging.info('test the best model') chainer.serializers.load_npz(args.outdir + '/rnnlm.model.best', model) test = read_tokens(args.test_label, args.char_list_dict) n_test_tokens, n_test_oovs = count_tokens(test, unk) logging.info('#sentences in the test data = ' + str(len(test))) logging.info('#tokens in the test data = ' + str(n_test_tokens)) logging.info('oov rate in the test data = %.2f %%' % (n_test_oovs / n_test_tokens * 100)) test_iter = ParallelSentenceIterator(test, args.batchsize, max_length=args.maxlen, sos=eos, eos=eos, repeat=False) evaluator = LMEvaluator(test_iter, model, device=gpu_id) with chainer.using_config('train', False): result = evaluator() logging.info('test perplexity: ' + str(np.exp(float(result['main/loss']))))
G.to_gpu(0) d_optimizer = chainer.optimizers.Adam(alpha=0.00002) d_optimizer.setup(D) g_optimizer = chainer.optimizers.Adam(alpha=0.00002) g_optimizer.setup(G) updater = GANUpdater(train_iter, D, G, d_optimizer, g_optimizer, latent_size, device=device_id) trainer = training.Trainer(updater, stop_trigger=(num_epochs, 'epoch'), out='mnist_result') trainer.extend(extensions.LogReport()) trainer.extend( extensions.PrintReport(['epoch', 'd_loss', 'g_loss', 'elapsed_time'])) trainer.run() z = Variable(np.random.randn(10, 64).astype(np.float32)) z.to_gpu(0) fake_images = G(z) fake_images.to_cpu() for i in range(10): plt.imshow(fake_images.data[i].reshape([28, 28])) plt.show()
def main(): parser = argparse.ArgumentParser( description='Imbalanced MNIST classification') parser.add_argument('--eval-mode', type=int, default=1, help='Evaluation mode.' '0: only binary_accuracy is calculated.' '1: binary_accuracy and ROC-AUC score is calculated') parser.add_argument('--batchsize', '-b', type=int, default=100, help='batch size') parser.add_argument('--gpu', '-g', type=int, default=-1, help='GPU ID to use. Negative value indicates ' 'not to use GPU and to run the code in CPU.') parser.add_argument('--out', '-o', type=str, default='result', help='path to output directory') parser.add_argument('--epoch', '-e', type=int, default=10, help='number of epochs') parser.add_argument('--resume', '-r', type=str, default='', help='path to a trainer snapshot') parser.add_argument('--frequency', '-f', type=int, default=-1, help='Frequency of taking a snapshot') parser.add_argument('--protocol', type=int, default=2, help='protocol version for pickle') parser.add_argument('--model-filename', type=str, default='classifier.pkl', help='file name for pickled model') parser.add_argument('--updater-type', type=str, default='standard') parser.add_argument('--sampling-size', type=int, default=32) parser.add_argument('--optimizer-type', type=str, default='Adam') parser.add_argument('--alpha', type=str, default='0.001') args = parser.parse_args() # Dataset preparation train, train_val, val = get_binary_imbalanced_data() train_iter = iterators.SerialIterator(train, args.batchsize) val_iter = iterators.SerialIterator(val, args.batchsize, repeat=False, shuffle=False) model = LeNet(n_class=1, binary=True) classifier = Classifier(model, lossfun=F.sigmoid_cross_entropy, metrics_fun=F.binary_accuracy, device=args.gpu) if args.optimizer_type == 'Adam': optimizer = optimizers.Adam() else: optimizer = optimizers.SGD(lr=1e-3) optimizer.setup(classifier) updater_type = args.updater_type if updater_type == 'standard': updater = training.StandardUpdater(train_iter, optimizer, device=args.gpu) elif updater_type == 'proposed': updater = Proposed(train_iter, optimizer, device=args.gpu, sampling_size=args.sampling_size) elif updater_type == 'LRE': x, t = chainer.dataset.concat_examples(train) train_val_iter = iterators.SerialIterator(train_val, len(train_val)) updater = LRE({ 'main': train_iter, 'val': train_val_iter }, optimizer, device=args.gpu, alpha=args.alpha) trainer = training.Trainer(updater, (args.epoch, 'epoch'), out=args.out) trainer.extend(E.Evaluator(val_iter, classifier, device=args.gpu)) trainer.extend(E.LogReport()) eval_mode = args.eval_mode if eval_mode == 0: trainer.extend( E.PrintReport([ 'epoch', 'main/loss', 'main/accuracy', 'validation/main/loss', 'validation/main/accuracy', 'elapsed_time' ])) elif eval_mode == 1: train_eval_iter = iterators.SerialIterator(train, args.batchsize, repeat=False, shuffle=False) trainer.extend( ROCAUCEvaluator(train_eval_iter, classifier, eval_func=model, device=args.gpu, name='train', pos_labels=1, ignore_labels=-1, raise_value_error=False)) # extension name='validation' is already used by `Evaluator`, # instead extension name `val` is used. trainer.extend( ROCAUCEvaluator(val_iter, classifier, eval_func=model, device=args.gpu, name='val', pos_labels=1, ignore_labels=-1)) trainer.extend( E.PrintReport([ 'epoch', 'main/loss', 'main/accuracy', 'train/main/roc_auc', 'validation/main/loss', 'validation/main/accuracy', 'val/main/roc_auc', 'elapsed_time' ])) else: raise ValueError('Invalid accfun_mode {}'.format(eval_mode)) trainer.extend(E.ProgressBar(update_interval=10)) frequency = args.epoch if args.frequency == -1 else max(1, args.frequency) trainer.extend(E.snapshot(), trigger=(frequency, 'epoch')) if args.resume: chainer.serializers.load_npz(args.resume, trainer) trainer.run() classifier.save_pickle(os.path.join(args.out, args.model_filename), protocol=args.protocol)
def main(): # 各種パラメータ設定 parser = argparse.ArgumentParser(description='iMaterialist_Challenge:') parser.add_argument('--batchsize', '-b', type=int, default=128, help='1バッチあたり何枚か') parser.add_argument('--epoch', '-e', type=int, default=10, help='何epochやるか') parser.add_argument('--out', '-o', default='result', help='結果を出力するディレクトリ') parser.add_argument('--resume', '-r', default='', help='指定したsnapshopから継続して学習します') parser.add_argument('--frequency', '-f', type=int, default=1, help='指定したepotchごとに重みを保存します') parser.add_argument('--gpu', '-g', type=int, default=-1, help='使うGPUの番号') parser.add_argument('--size', '-s', type=int, default=256, help='正規化する時の一辺のpx'), parser.add_argument('--label_variety', type=int, default=228, help='確認できたlabelの総数 この中で判断する'), parser.add_argument('--total_photo_num', '-n', type=int, default=-1, help='使用する写真データの数'), # (9815, 39269) parser.add_argument('--object', type=str, default='train', help='train or test のどちらか選んだ方のデータを使用する'), parser.add_argument('--cleanup', '-c', dest='cleanup', action='store_false', help='付与すると 邪魔な画像を取り除き trashディレクトリに移動させる機能を停止させます'), parser.add_argument('--interval', '-i', type=int, default=10, help='何iteraionごとに画面に出力するか') parser.add_argument('--model', '-m', type=int, default=0, help='使うモデルの種類') parser.add_argument('--lossfunc', '-l', type=int, default=0, help='使うlossの種類'), parser.add_argument('--stream', '-d', dest='stream', action='store_true', help='画像のダウンロードを同時に行う'), parser.add_argument('--parallel', '-p', dest='douji', action='store_true', help='画像ダウンロードを並列処理するか') args = parser.parse_args() # args.model = -1 # args.batchsize = 8 args.size = 224 # args.interval = 1 # args.cleanup = False # args.lossfunc = 3 # # args.stream = True # # args.total_photo_num = 200 # args.resume = 'serverresult/snapshot_iter_74' # liteがついているのはsizeをデフォルトの半分にするの前提で作っています # RES_SPP_netはchainerで可変量サイズの入力を実装するのが難しかったので頓挫 model = { 0: 'ResNet', 1: 'ResNet_lite', 2: 'Bottle_neck_RES_net', 3: 'Bottle_neck_RES_net_lite', 4: 'Mymodel', 5: 'RES_SPP_net', 6: 'VGGTrans', 7: 'RESNetTrans', 8: 'Lite' }[args.model] print('GPU: {}'.format(args.gpu)) print('# Minibatch-size: {}'.format(args.batchsize)) print('# epoch: {}'.format(args.epoch)) print('# model: {}'.format(model)) print('# size: {}'.format(args.size)) print('') # モデルの定義 model = getattr(mymodel, model)(args.label_variety, args.lossfunc) # GPUで動かせるのならば動かす if args.gpu >= 0: chainer.cuda.get_device_from_id(args.gpu).use() model.to_gpu() # optimizerのセットアップ optimizer = chainer.optimizers.Adam() # optimizer = chainer.optimizers.MomentumSGD(0.1, 0.9) # https://arxiv.org/pdf/1605.07146.pdf # chainer.optimizer.WeightDecay(0.0005) optimizer.setup(model) # データセットのセットアップ photo_nums = photos(args) train, val = chainer.datasets.split_dataset_random( photo_nums, int(len(photo_nums) * 0.8), seed=0) # 2割をvalidation用にとっておく trans = Transform(args, photo_nums, True, False if args.model == 5 else True) train = chainer.datasets.TransformDataset(train, trans) val = chainer.datasets.TransformDataset(val, trans) train_iter = chainer.iterators.SerialIterator(train, args.batchsize) val_iter = chainer.iterators.SerialIterator(val, args.batchsize, repeat=False, shuffle=False) # 学習をどこまで行うかの設定 stop_trigger = (args.epoch, 'epoch') # if args.early_stopping: # optimizerがAdamだと無意味 # stop_trigger = training.triggers.EarlyStoppingTrigger( # monitor=args.early_stopping, verbose=True, # max_trigger=(args.epoch, 'epoch')) # uodater, trainerのセットアップ updater = training.updaters.StandardUpdater(train_iter, optimizer, device=args.gpu, loss_func=model.loss_func) trainer = training.Trainer(updater, stop_trigger, out=args.out) # testデータでの評価の設定 evaluator = MyEvaluator(val_iter, model, device=args.gpu, eval_func=model.loss_func) evaluator.trigger = 1, 'epoch' trainer.extend(evaluator) if args.model == 6 or args.model == 7: model.base.disable_update() # モデルの層をdotファイルとして出力する設定 trainer.extend(extensions.dump_graph('main/loss')) # snapshot(学習中の重み情報)の保存 frequency = args.epoch if args.frequency == -1 else max(1, args.frequency) trainer.extend(extensions.snapshot(), trigger=(frequency, 'epoch')) # trainデータでの評価の表示頻度設定 logreport = extensions.LogReport(trigger=(args.interval, 'iteration')) trainer.extend(logreport) # 各データでの評価の保存設定 if extensions.PlotReport.available(): trainer.extend( extensions.PlotReport(['main/loss', 'val/loss'], 'iteration', trigger=(5, 'iteration'), file_name='loss.png')) trainer.extend( extensions.PlotReport(['main/acc', 'val/acc'], 'iteration', trigger=(5, 'iteration'), file_name='accuracy.png')) trainer.extend( extensions.PlotReport(['main/freq_err', 'val/freq_err'], 'iteration', trigger=(5, 'iteration'), file_name='frequent_error.png')) trainer.extend( extensions.PlotReport(['main/acc2', 'val/acc2'], 'iteration', trigger=(5, 'iteration'), file_name='accuracy2.png')) trainer.extend( extensions.PlotReport(['main/f1', 'val/f1'], 'iteration', trigger=(5, 'iteration'), file_name='f1.png')) # 各データでの評価の表示(欄に関する)設定 trainer.extend( extensions.PrintReport([ 'epoch', 'iteration', 'main/loss', 'val/loss', 'main/acc', 'main/acc2', 'val/acc2', 'main/precision', 'main/recall', 'main/f1', 'val/f1', 'main/labelnum', 'main/fpk', 'elapsed_time' ])) # ['epoch', 'iteration', 'main/loss', 'val/loss','main/acc', 'val/acc', 'main/acc2', 'val/acc2', # 'main/precision', 'main/recall', 'main/f1', 'val/f1', 'main/labelnum', 'main/tpk', 'elapsed_time'])) # プログレスバー表示の設定 trainer.extend(extensions.ProgressBar(update_interval=args.interval)) # trainer.extend(MyShift("lr", 1 / 5, logreport, 0.1)) # 学習済みデータの読み込み設定 if args.resume: chainer.serializers.load_npz( args.resume, model, path='updater/model:main/') # なぜかpathを外すと読み込めなくなってしまった 原因不明 # 学習の実行 if args.stream and args.parallel: import concurrent.futures executor = concurrent.futures.ThreadPoolExecutor(max_workers=2) executor.submit(trainer.run) def train_download(): for num in [ train_iter.dataset._dataset[i] for i in train_iter._order ]: trans.download(-num) time.sleep(0.01) def val_download(): for num in train_iter.dataset._dataset[train_iter.dataset._start:]: trans.download(-num) time.sleep(0.01) executor.submit(train_download) executor.submit(val_download) else: trainer.run()