def main(): parser = argparse.ArgumentParser() parser.add_argument("--model", default=None) parser.add_argument("--gpu", type=int, default=0) parser.add_argument("--batch_size", type=int, default=4) parser.add_argument("--data_dir", type=str, default="./datasets") parser.add_argument("--data_list", type=str, default="train.txt") parser.add_argument("--n_class", type=int, default=5) parser.add_argument("--n_steps", type=int, default=100) parser.add_argument("--snapshot_dir", type=str, default="./snapshots") parser.add_argument("--save_steps", type=int, default=50) args = parser.parse_args() print(args) if not os.path.exists(args.snapshot_dir): os.makedirs(args.snapshot_dir) model = RefineResNet(n_class=args.n_class) if args.model is not None: serializers.load_npz(args.model, model) if args.gpu >= 0: cuda.get_device(args.gpu).use() model.to_gpu() xp = cuda.cupy else: xp = np optimizer = Adam() #optimizer = MomentumSGD() optimizer.setup(model) optimizer.add_hook(WeightDecay(1e-5), "hook_wd") train_dataset = ImageDataset(args.data_dir, args.data_list, crop_size=(320, 320)) train_iterator = MultiprocessIterator(train_dataset, batch_size=args.batch_size, repeat=True, shuffle=True) step = 0 for zipped_batch in train_iterator: step += 1 x = Variable(xp.array([zipped[0] for zipped in zipped_batch])) y = Variable( xp.array([zipped[1] for zipped in zipped_batch], dtype=xp.int32)) pred = xp.array(model(x).data, dtype=xp.float32) loss = F.softmax_cross_entropy(pred, y) optimizer.update(F.softmax_cross_entropy, pred, y) print("Step: {}, Loss: {}".format(step, loss.data)) if step % args.save_steps == 0: serializers.save_npz( os.path.join(args.snapshot_dir, "model_{}.npz".format(step)), model) if step >= args.n_steps: break
def train(args): model = PeepHoleJaLSTMParser(args.model, args.word_emb_size, args.char_emb_size, args.nlayers, args.hidden_dim, args.relu_dim, args.dep_dim, args.dropout_ratio) with open(args.model + "/params", "w") as f: log(args, f) if args.initmodel: print('Load model from', args.initmodel) chainer.serializers.load_npz(args.initmodel, model) if args.pretrained: print('Load pretrained word embeddings from', args.pretrained) model.load_pretrained_embeddings(args.pretrained) if args.gpu >= 0: chainer.cuda.get_device(args.gpu).use() model.to_gpu() converter = lambda x, device: \ concat_examples(x, device=device, padding=-1) train = LSTMParserDataset(args.model, args.train) train_iter = SerialIterator(train, args.batchsize) val = LSTMParserDataset(args.model, args.val) val_iter = SerialIterator( val, args.batchsize, repeat=False, shuffle=False) optimizer = chainer.optimizers.Adam(beta2=0.9) # optimizer = chainer.optimizers.MomentumSGD(momentum=0.7) optimizer.setup(model) optimizer.add_hook(WeightDecay(1e-6)) # optimizer.add_hook(GradientClipping(5.)) updater = training.StandardUpdater(train_iter, optimizer, device=args.gpu, converter=converter) trainer = training.Trainer(updater, (args.epoch, 'epoch'), args.model) val_interval = 2000, 'iteration' log_interval = 200, 'iteration' eval_model = model.copy() eval_model.train = False trainer.extend(extensions.Evaluator(val_iter, eval_model, converter, device=args.gpu), trigger=val_interval) trainer.extend(extensions.snapshot_object( model, 'model_iter_{.updater.iteration}'), trigger=val_interval) trainer.extend(extensions.LogReport(trigger=log_interval)) trainer.extend(extensions.PrintReport([ 'epoch', 'iteration', 'main/tagging_loss', 'main/tagging_accuracy', 'main/tagging_loss', 'main/parsing_accuracy', 'main/parsing_loss', 'validation/main/tagging_accuracy', 'validation/main/parsing_accuracy' ]), trigger=log_interval) trainer.extend(extensions.ProgressBar(update_interval=10)) trainer.run()
def train(args): model = LSTMTagger(args.model, args.word_emb_size, args.char_emb_size, args.nlayers, args.hidden_dim, args.relu_dim, args.dropout_ratio) with open(args.model + "/params", "w") as f: log(args, f) if args.initmodel: print('Load model from', args.initmodel) chainer.serializers.load_npz(args.initmodel, model) if args.pretrained: print('Load pretrained word embeddings from', args.pretrained) model.load_pretrained_embeddings(args.pretrained) train = LSTMTaggerDataset(args.model, args.train) train_iter = chainer.iterators.SerialIterator(train, args.batchsize) val = LSTMTaggerDataset(args.model, args.val) val_iter = chainer.iterators.SerialIterator(val, args.batchsize, repeat=False, shuffle=False) optimizer = chainer.optimizers.MomentumSGD(momentum=0.7) optimizer.setup(model) optimizer.add_hook(WeightDecay(1e-6)) # optimizer.add_hook(GradientClipping(5.)) updater = training.StandardUpdater(train_iter, optimizer, converter=converter) trainer = training.Trainer(updater, (args.epoch, 'epoch'), args.model) val_interval = 1000, 'iteration' log_interval = 200, 'iteration' eval_model = model.copy() eval_model.train = False trainer.extend(extensions.Evaluator(val_iter, eval_model, converter), trigger=val_interval) trainer.extend(extensions.snapshot_object( model, 'model_iter_{.updater.iteration}'), trigger=val_interval) trainer.extend(extensions.LogReport(trigger=log_interval)) trainer.extend(extensions.PrintReport([ 'epoch', 'iteration', 'main/loss', 'validation/main/loss', 'main/accuracy', 'validation/main/accuracy', ]), trigger=log_interval) trainer.extend(extensions.ProgressBar(update_interval=10)) trainer.run()
def apply_weightdecay_only_w(model, rate): """ use after setup optimizer """ if hasattr(model, 'params'): for p in model.params(): if p.name == 'W': if hasattr(p.update_rule.hyperparam, 'weight_decay_rate'): p.update_rule.hyperparam.weight_decay_rate = rate else: p.update_rule.add_hook(WeightDecay(rate))
def __call__(self, model, train, test, out_dir, optname='SGD', lr=1.0, rate=0.9, weighting=False, gpu=-1, bsize=64, test_bsize=10, esize=50, mname=None, progress=True, lr_attr='lr', l2=0.0, keys=['main/loss', 'validation/main/loss', 'main/accuracy', 'validation/main/accuracy', 'validation_in_mca/main/mca'], s_keys=['validation/main/loss', 'validation/main/accuracy', 'validation_in_mca/main/mca'], p_keys=['epoch', 'main/loss', 'validation/main/loss', 'main/accuracy', 'validation/main/accuracy', 'validation_in_mca/main/mca', 'elapsed_time']): train_iter = custom_iterator(train, batch_size=bsize) test_iter = custom_iterator(test, batch_size=test_bsize, repeat=False, shuffle=False) if weighting: label_cnt = train_iter.get_label_cnt() n_cls = len(label_cnt.keys()) cls_weight = numpy.empty(n_cls) for k, cnt in six.iteritems(label_cnt): cls_weight[k] = cnt cls_weight = (cls_weight.sum() / cls_weight / n_cls).astype(numpy.float32) else: cls_weight = None if gpu >= 0: cuda.get_device(gpu).use() model.to_gpu() if cls_weight is not None: cls_weight = cuda.to_gpu(cls_weight) model.cls_weight = cls_weight optimizer = self.optimizers[optname](lr) optimizer.setup(model) if l2 > 0: optimizer.add_hook(WeightDecay(l2)) updater = training.StandardUpdater(train_iter, optimizer, device=gpu) trainer = training.Trainer(updater, (esize, 'epoch'), out=out_dir) trainer.extend(testmode_evaluator(test_iter, model, device=gpu)) trainer.extend(mca_evaluator(test_iter, model, device=gpu)) trainer.extend(extensions.LogReport()) trainer.extend(scale_lr(attr=lr_attr, rate=rate)) trainer.extend(best_scoring(model, keys, s_keys=s_keys, mname=mname)) if progress: trainer.extend(extensions.PrintReport(p_keys)) trainer.extend(extensions.ProgressBar()) trainer.run() return model.predictor.best_score
def _setup_optimizer(config, model, comm): optimizer_name = config['optimizer'] lr = float(config['init_lr']) weight_decay = float(config['weight_decay']) if optimizer_name == 'Adam': optimizer = Adam(alpha=lr, weight_decay_rate=weight_decay) elif optimizer_name in \ ('SGD', 'MomentumSGD', 'CorrectedMomentumSGD', 'RMSprop'): optimizer = eval(optimizer_name)(lr=lr) if weight_decay > 0.: optimizer.add_hook(WeightDecay(weight_decay)) else: raise ValueError('Invalid optimizer: {}'.format(optimizer_name)) if comm is not None: optimizer = chainermn.create_multi_node_optimizer(optimizer, comm) optimizer.setup(model) return optimizer
def train(self, trainsents, testsents, parserfile): classifier = L.Classifier(self.model) if self.gpu: cuda.get_device().use() classifier = classifier.to_gpu() trainexamples = self.gen_trainexamples(trainsents) optimizer = O.AdaGrad(.01, 1e-6) optimizer.setup(classifier) optimizer.add_hook(WeightDecay(1e-8)) # optimizer = O.MomentumSGD(.05, .9) # optimizer.setup(classifier) # optimizer.add_hook(WeightDecay(1e-4)) # optimizer.add_hook(ExponentialMovingAverage()) best_uas = 0. print >> sys.stderr, "will run {} iterations".format(self.niters) for step in range(1, self.niters + 1): batch = random.sample(trainexamples, self.batchsize) t = Variable(xp.concatenate(map(lambda ex: ex.target, batch))) optimizer.update(classifier, batch, t) if type(self.model) == WeightAveragedFF: self.model.update_averaged(step) print >> sys.stderr, "Epoch:{}\tloss:{}\tacc:{}".format( step, classifier.loss.data, classifier.accuracy.data) if step % self.evaliter == 0: print >> sys.stderr, "Evaluating model on dev data..." res = self(testsents) uas, las = accuracy(res) if uas > best_uas: print >> sys.stderr, "Best score. Saving parser...", self.save(parserfile) best_uas = uas print >> sys.stderr, "done" self.save(parserfile + ".final") print >> sys.stderr, "done"
def init_optimizer(self, opt_cfg): print("Setting up optimizer") if opt_cfg['type'] == _ADAM: print("using ADAM") self.optimizer = optimizers.Adam(alpha=opt_cfg['lr'], beta1=0.9, beta2=0.999, eps=1e-08, amsgrad=True) else: print("using SGD") self.optimizer = optimizers.SGD(lr=opt_cfg['lr']) print("learning rate: {0:f}".format(opt_cfg['lr'])) # Attach optimizer self.optimizer.setup(self.model) # Add Weight decay if opt_cfg['l2'] > 0: print("Adding WeightDecay: {0:f}".format(opt_cfg['l2'])) self.optimizer.add_hook(WeightDecay(opt_cfg['l2'])) # Gradient clipping print("Clipping gradients at: {0:d}".format(opt_cfg['grad_clip'])) self.optimizer.add_hook( GradientClipping(threshold=opt_cfg['grad_clip'])) # Gradient noise if opt_cfg['grad_noise_eta'] > 0: print("Adding gradient noise: {0:f}".format( opt_cfg['grad_noise_eta'])) self.optimizer.add_hook( chainer.optimizer.GradientNoise(eta=opt_cfg['grad_noise_eta'])) # Freeze weights for l in opt_cfg['freeze']: if l in self.model.__dict__: print("freezing: {0:s}".format(l)) self.model[l].disable_update() else: print("layer {0:s} not in model".format(l))
from config.parseval_svhn.base import * from src.model.classifier import LMTraining from chainer.optimizer import WeightDecay mode = ['default'] model = LMTraining(predictor, preprocess) hook = [WeightDecay(5e-4)]
def training(args): source = EventField(fix_length=args.event_size, embed_size=args.src_embed) mask_flag = 'tmpl' in args.net sentence_size = args.sentence_size if args.truncate else None reverse_decode = args.reverse_decode if 'disc' in args.net: target = TextAndContentWordField(start_token=None, fix_length=sentence_size, mask_player=mask_flag, mask_team=mask_flag, numbering=args.numbering, reverse=reverse_decode, bpc=args.bpc, multi_tag=args.multi_tag) else: target = TextField(start_token=None, fix_length=sentence_size, mask_player=mask_flag, mask_team=mask_flag, numbering=args.numbering, reverse=reverse_decode, bpc=args.bpc, multi_tag=args.multi_tag) if args.truncate: train = OptaDataset(path=args.dataset + '.train', fields={ 'source': source, 'target': target }) else: train = OptaDataset(path=args.dataset + '.train', fields={ 'source': source, 'target': target }, limit_length=args.limit) source.build_vocabulary(train.source) target.build_vocabulary(train.target, size=args.vocab_size) target.player_to_id = source.player_to_id target.players = source.id_to_player if mask_flag or 'disc' in args.net: content_word_to_id = getattr(target, 'content_word_to_id', None) target_test = TestTextField(source.id_to_player, source.id_to_team, target.word_to_id, content_word_to_id, target.unk_id, fix_length=None, bpc=args.bpc) else: target_test = TextField(start_token=None, end_token=None, fix_length=None, bpc=args.bpc) target_test.word_to_id = target.word_to_id target_test.id_to_word = target.id_to_word target_test.unk_id = target.unk_id dev = OptaDataset(path=args.dataset + '.dev', fields={ 'source': source, 'target': target_test }, limit_length=args.limit) train2 = OptaDataset(path=args.dataset + '.train', fields={ 'source': source, 'target': target_test }, limit_length=args.limit) test = OptaDataset(path=args.dataset + '.test', fields={ 'source': source, 'target': target_test }) test20 = OptaDataset(path=args.dataset + '.test', fields={ 'source': source, 'target': target_test }, limit_length=20) test15 = OptaDataset(path=args.dataset + '.test', fields={ 'source': source, 'target': target_test }, limit_length=15) test10 = OptaDataset(path=args.dataset + '.test', fields={ 'source': source, 'target': target_test }, limit_length=10) start_id, end_id = target.word_to_id['<s>'], target.word_to_id['</s>'] class_weight = compute_class_weight('./dataset/player_list.txt', target.word_to_id, args.class_weight[0], args.class_weight[1], gpu=args.gpu) dirname = Utility.get_save_directory( args.net, './debug' if args.debug else args.output) if args.debug: save_path = os.path.join('./debug', dirname) else: save_path = os.path.join(args.output, dirname) Utility.make_directory(save_path) del args.vocab_size setting = { 'vocab_size': len(target.word_to_id), 'type_size': len(source.type_to_id), 'player_size': len(source.player_to_id), 'team_size': len(source.team_to_id), 'detail_size': len(source.detail_to_id), 'detail_dim': source.details_dimention, 'start_id': start_id, 'end_id': end_id, 'unk_id': target.unk_id, 'save_path': save_path, **vars(args) } dump_setting(setting, os.path.join(save_path, 'setting.yaml')) home_player_tag = target.word_to_id.get(target.home_player_tag) away_player_tag = target.word_to_id.get(target.away_player_tag) home_team_tag = target.word_to_id.get(target.home_team_tag) away_team_tag = target.word_to_id.get(target.away_team_tag) print('vocab size: {}'.format(len(target.word_to_id))) if args.net == 'plain': model = MLPEncoder2AttentionDecoder(len(source.type_to_id), len(source.player_to_id), len(source.team_to_id), len(source.detail_to_id), source.details_dimention, args.src_embed, args.event_size, len(target.word_to_id), args.trg_embed, args.hidden, start_id, end_id, class_weight, args.mlp_layers, args.max_length, args.dropout, IGNORE_LABEL, reverse_decode=reverse_decode) elif args.net == 'tmpl': model = MLPEncoder2AttentionDecoder(len(source.type_to_id), len(source.player_to_id), len(source.team_to_id), len(source.detail_to_id), source.details_dimention, args.src_embed, args.event_size, len(target.word_to_id), args.trg_embed, args.hidden, start_id, end_id, class_weight, args.mlp_layers, args.max_length, args.dropout, IGNORE_LABEL, source.id_to_player, home_player_tag, away_player_tag, source.id_to_team, home_team_tag, away_team_tag, target.player_to_id, target.players, reverse_decode=reverse_decode) elif args.net == 'gate': model = MLPEncoder2GatedAttentionDecoder(len(source.type_to_id), len(source.player_to_id), len(source.team_to_id), len(source.detail_to_id), source.details_dimention, args.src_embed, args.event_size, len(target.word_to_id), args.trg_embed, args.hidden, start_id, end_id, class_weight, args.mlp_layers, args.max_length, args.dropout, IGNORE_LABEL, reverse_decode=reverse_decode) elif args.net == 'gate-tmpl': model = MLPEncoder2GatedAttentionDecoder(len(source.type_to_id), len(source.player_to_id), len(source.team_to_id), len(source.detail_to_id), source.details_dimention, args.src_embed, args.event_size, len(target.word_to_id), args.trg_embed, args.hidden, start_id, end_id, class_weight, args.mlp_layers, args.max_length, args.dropout, IGNORE_LABEL, source.id_to_player, home_player_tag, away_player_tag, source.id_to_team, home_team_tag, away_team_tag, target.player_to_id, target.players, reverse_decode=reverse_decode) elif args.net == 'disc': model = DiscriminativeMLPEncoder2AttentionDecoder( len(source.type_to_id), len(source.player_to_id), len(source.team_to_id), len(source.detail_to_id), source.details_dimention, args.src_embed, args.event_size, len(target.word_to_id), len(target.content_word_to_id), args.trg_embed, args.hidden, start_id, end_id, class_weight, args.loss_weight, args.disc_loss, args.loss_func, args.mlp_layers, args.max_length, args.dropout, IGNORE_LABEL, reverse_decode=reverse_decode) elif args.net == 'disc-tmpl': model = DiscriminativeMLPEncoder2AttentionDecoder( len(source.type_to_id), len(source.player_to_id), len(source.team_to_id), len(source.detail_to_id), source.details_dimention, args.src_embed, args.event_size, len(target.word_to_id), len(target.content_word_to_id), args.trg_embed, args.hidden, start_id, end_id, class_weight, args.loss_weight, args.disc_loss, args.loss_func, args.mlp_layers, args.max_length, args.dropout, IGNORE_LABEL, source.id_to_player, home_player_tag, away_player_tag, source.id_to_team, home_team_tag, away_team_tag, target.player_to_id, target.players, reverse_decode=reverse_decode) elif args.net == 'gate-disc': model = DiscriminativeMLPEncoder2GatedAttentionDecoder( len(source.type_to_id), len(source.player_to_id), len(source.team_to_id), len(source.detail_to_id), source.details_dimention, args.src_embed, args.event_size, len(target.word_to_id), len(target.content_word_to_id), args.trg_embed, args.hidden, start_id, end_id, class_weight, args.loss_weight, args.disc_loss, args.loss_func, args.mlp_layers, args.max_length, args.dropout, IGNORE_LABEL, reverse_decode=reverse_decode) elif args.net == 'gate-disc-tmpl': model = DiscriminativeMLPEncoder2GatedAttentionDecoder( len(source.type_to_id), len(source.player_to_id), len(source.team_to_id), len(source.detail_to_id), source.details_dimention, args.src_embed, args.event_size, len(target.word_to_id), len(target.content_word_to_id), args.trg_embed, args.hidden, start_id, end_id, class_weight, args.loss_weight, args.disc_loss, args.loss_func, args.mlp_layers, args.max_length, args.dropout, IGNORE_LABEL, source.id_to_player, home_player_tag, away_player_tag, source.id_to_team, home_team_tag, away_team_tag, target.player_to_id, target.players, reverse_decode=reverse_decode) elif args.net == 'conv-gate-disc-tmpl': model = DiscriminativeGLUEncoder2GatedAttentionDecoder( len(source.type_to_id), len(source.player_to_id), len(source.team_to_id), len(source.detail_to_id), source.details_dimention, args.src_embed, args.event_size, len(target.word_to_id), len(target.content_word_to_id), args.trg_embed, args.hidden, start_id, end_id, class_weight, args.loss_weight, args.disc_loss, args.loss_func, args.mlp_layers, args.max_length, args.dropout, IGNORE_LABEL, source.id_to_player, home_player_tag, away_player_tag, source.id_to_team, home_team_tag, away_team_tag, target.player_to_id, target.players, reverse_decode=reverse_decode) model.keyword_ids = [ target.word_to_id['save'], target.word_to_id['block'], target.word_to_id['chance'], target.word_to_id['shot'], target.word_to_id['clearance'], target.word_to_id['kick'], target.word_to_id['ball'], target.word_to_id['blocked'], target.word_to_id['denied'] ] model.id_to_word = target.id_to_word if args.numbering: model.player_id = target.player_id model.team_id = target.team_id if args.gpu is not None: model.use_gpu(args.gpu) opt = optimizers.Adam(args.lr) opt.setup(model) if args.clipping > 0: opt.add_hook(GradientClipping(args.clipping)) if args.decay > 0: opt.add_hook(WeightDecay(args.decay)) N = len(train.source) batch_size = args.batch order_provider = OrderProvider(Sampling.get_random_order(N)) src_train_iter = SequentialIterator(train.source, batch_size, order_provider, args.event_size, source.fillvalue, gpu=args.gpu) if 'disc' in args.net: trg_train_iter = TextAndLabelIterator(train.target, batch_size, order_provider, args.sentence_size, IGNORE_LABEL, gpu=args.gpu) else: trg_train_iter = SequentialIterator(train.target, batch_size, order_provider, args.sentence_size, IGNORE_LABEL, gpu=args.gpu) src_dev_iter = SequentialIterator(dev.source, batch_size, None, args.event_size, source.fillvalue, gpu=args.gpu) trg_dev_iter = Iterator(dev.target, batch_size, wrapper=EndTokenIdRemoval(end_id), gpu=None) src_test_iter = SequentialIterator(test.source, batch_size, None, args.event_size, source.fillvalue, gpu=args.gpu) src_test20_iter = SequentialIterator(test20.source, batch_size, None, args.event_size, source.fillvalue, gpu=args.gpu) src_test15_iter = SequentialIterator(test15.source, batch_size, None, args.event_size, source.fillvalue, gpu=args.gpu) src_test10_iter = SequentialIterator(test10.source, batch_size, None, args.event_size, source.fillvalue, gpu=args.gpu) src_train2_iter = SequentialIterator(train2.source, batch_size, None, args.event_size, source.fillvalue, gpu=args.gpu) trg_train2_iter = Iterator(train2.target, batch_size, wrapper=EndTokenIdRemoval(end_id), gpu=None) trg_test_iter = Iterator(test.target, batch_size, wrapper=EndTokenIdRemoval(end_id), gpu=None) trg_test20_iter = Iterator(test20.target, batch_size, wrapper=EndTokenIdRemoval(end_id), gpu=None) trg_test15_iter = Iterator(test15.target, batch_size, wrapper=EndTokenIdRemoval(end_id), gpu=None) trg_test10_iter = Iterator(test10.target, batch_size, wrapper=EndTokenIdRemoval(end_id), gpu=None) if 'disc' in args.net: trainer = Seq2SeqWithLabelTrainer( model, opt, src_train_iter, trg_train_iter, src_dev_iter, trg_dev_iter, order_provider, evaluate_bleu_and_accuracy, args.epoch, save_path, args.eval_step, src_train2_iter, trg_train2_iter) else: trainer = Seq2SeqTrainer(model, opt, src_train_iter, trg_train_iter, src_dev_iter, trg_dev_iter, order_provider, evaluate_bleu, args.epoch, save_path, args.eval_step, src_train2_iter, trg_train2_iter) trainer.run() # load best model model.load_model(os.path.join(save_path, 'best.model')) if 'disc' in args.net: bleu_score_dev, _, _ = evaluate_bleu_and_accuracy( model, src_dev_iter, trg_dev_iter) bleu_score, _, _ = evaluate_bleu_and_accuracy(model, src_test_iter, trg_test_iter) bleu_score20, _, hypotheses = evaluate_bleu_and_accuracy( model, src_test20_iter, trg_test20_iter) bleu_score15, _, _ = evaluate_bleu_and_accuracy( model, src_test15_iter, trg_test15_iter) bleu_score10, _, _ = evaluate_bleu_and_accuracy( model, src_test10_iter, trg_test10_iter) else: bleu_score_dev, _ = evaluate_bleu(model, src_dev_iter, trg_dev_iter) bleu_score, _ = evaluate_bleu(model, src_test_iter, trg_test_iter) bleu_score20, hypotheses = evaluate_bleu(model, src_test20_iter, trg_test20_iter) bleu_score15, _ = evaluate_bleu(model, src_test15_iter, trg_test15_iter) bleu_score10, _ = evaluate_bleu(model, src_test10_iter, trg_test10_iter) TextFile(os.path.join(save_path, 'hypotheses.txt'), [' '.join(ys) for ys in trainer.hypotheses]).save() print('dev score: {}'.format(bleu_score_dev)) print('test score: {}'.format(bleu_score)) print('test score20: {}'.format(bleu_score20)) print('test score15: {}'.format(bleu_score15)) print('test score10: {}'.format(bleu_score10)) # saving fields pickle_dump(os.path.join(save_path, 'source.pkl'), source) pickle_dump(os.path.join(save_path, 'target.pkl'), target) pickle_dump(os.path.join(save_path, 'target_test.pkl'), target_test)
def main(): model_cfgs = { 'detnas_small_coco': { 'class': DetNASSmallCOCO, 'score_layer_name': 'fc', 'kwargs': { #'n_class': 1000 } }, } parser = argparse.ArgumentParser( description='Learning convnet from ILSVRC2012 dataset') parser.add_argument('train', help='Path to root of the train dataset') parser.add_argument('val', help='Path to root of the validation dataset') parser.add_argument('--trial', action='store_true') parser.add_argument('--gpu', type=int, default=0) parser.add_argument( '--model', '-m', choices=model_cfgs.keys(), default='detnas_small_coco', help='Convnet models') parser.add_argument('--loaderjob', type=int, default=4) parser.add_argument( '--batchsize', type=int, help='Batch size for each worker') parser.add_argument('--lr', type=float) parser.add_argument('--momentum', type=float) parser.add_argument('--weight_decay', type=float) parser.add_argument('--out', type=str, default='result') parser.add_argument('--epoch', type=int) args = parser.parse_args() # https://docs.chainer.org/en/stable/chainermn/tutorial/tips_faqs.html#using-multiprocessiterator if hasattr(multiprocessing, 'set_start_method'): multiprocessing.set_start_method('forkserver') p = multiprocessing.Process() p.start() p.join() label_names = directory_parsing_label_names(args.train) model_cfg = model_cfgs[args.model] extractor = model_cfg['class']( n_class=len(label_names), **model_cfg['kwargs']) extractor.pick = model_cfg['score_layer_name'] model = Classifier(extractor) train_data = DirectoryParsingLabelDataset(args.train) val_data = DirectoryParsingLabelDataset(args.val) train_data = TransformDataset(train_data, TrainTransform(extractor.mean)) val_data = TransformDataset(val_data, ValTransform(extractor.mean)) print('finished loading dataset') train_indices = np.arange(len(train_data)//(100 if args.trial else 1)) val_indices = np.arange(len(val_data)) """ train_data = train_data.slice[train_indices] val_data = val_data.slice[val_indices] """ train_iter = chainer.iterators.MultiprocessIterator( train_data, args.batchsize, n_processes=args.loaderjob) val_iter = iterators.MultiprocessIterator( val_data, args.batchsize, repeat=False, shuffle=False, n_processes=args.loaderjob) optimizer = CorrectedMomentumSGD(lr=args.lr, momentum=args.momentum) optimizer.setup(model) for param in model.params(): if param.name not in ('beta', 'gamma'): param.update_rule.add_hook(WeightDecay(args.weight_decay)) if args.gpu != -1: model.to_gpu(args.gpu) updater = chainer.training.StandardUpdater( train_iter, optimizer, device=args.gpu) trainer = training.Trainer(updater, (args.epoch, 'epoch'), out=args.out) trainer.extend(LinearShift('lr', (args.lr, 0.0), (0, len(train_indices) / args.batchsize))) evaluator = extensions.Evaluator(val_iter, model) trainer.extend(evaluator, trigger=(1, 'epoch')) log_interval = 0.1, 'epoch' print_interval = 0.1, 'epoch' trainer.extend( chainer.training.extensions.observe_lr(), trigger=log_interval) trainer.extend( extensions.snapshot_object(extractor, 'snapshot_model_{.updater.epoch}.npz'), trigger=(args.epoch, 'epoch')) trainer.extend(extensions.LogReport(trigger=log_interval)) trainer.extend( extensions.PrintReport([ 'iteration', 'epoch', 'elapsed_time', 'lr', 'main/loss', 'validation/main/loss', 'main/accuracy', 'validation/main/accuracy' ]), trigger=print_interval) trainer.extend(extensions.ProgressBar(update_interval=10)) trainer.run()
def main(): parser = argparse.ArgumentParser() parser.add_argument('--batchsize', type=int, default=8) parser.add_argument('--gpu', type=int, default=-1) parser.add_argument('--out', default='result') parser.add_argument('--iteration', type=int, default=10000) parser.add_argument('--interval', type=int, default=1000) parser.add_argument('--resume') args = parser.parse_args() label_names = pose_bbox_label_names BboxDataset = PoseBboxDataset model = SSD300(n_fg_class=len(label_names), pretrained_model='./models/imagenet.npz') model.use_preset('evaluate') train_chain = MultiboxTrainChain(model) if args.gpu >= 0: chainer.cuda.get_device_from_id(args.gpu).use() model.to_gpu() train = TransformDataset(BboxDataset(split='trainval'), Transform(model.coder, model.insize, model.mean)) train_iter = chainer.iterators.SerialIterator(train, args.batchsize) test = BboxDataset(split='test') test_iter = chainer.iterators.SerialIterator(test, args.batchsize, repeat=False, shuffle=False) # initial lr is set to 1e-3 by ExponentialShift optimizer = chainer.optimizers.MomentumSGD() optimizer.setup(train_chain) for param in train_chain.params(): if param.name == 'b': param.update_rule.add_hook(GradientScaling(2)) else: param.update_rule.add_hook(WeightDecay(0.0005)) updater = training.StandardUpdater(train_iter, optimizer, device=args.gpu) trainer = training.Trainer(updater, (args.iteration, 'iteration'), args.out) trainer.extend(extensions.ExponentialShift('lr', 0.1, init=2e-5), trigger=triggers.ManualScheduleTrigger( [int(args.iteration * 0.8), int(args.iteration * 0.9)], 'iteration')) trainer.extend(DetectionEvaluator(test_iter, model, use_07_metric=True, label_names=label_names), trigger=(args.interval, 'iteration')) log_interval = 10, 'iteration' trainer.extend(extensions.LogReport(trigger=log_interval)) trainer.extend(extensions.observe_lr(), trigger=log_interval) trainer.extend(extensions.PrintReport([ 'epoch', 'iteration', 'lr', 'main/loss', 'main/loss/loc', 'main/loss/conf', 'validation/main/map' ]), trigger=log_interval) trainer.extend(extensions.ProgressBar(update_interval=10)) trainer.extend(extensions.snapshot_object( model, 'pose_iter_{.updater.iteration}.npz'), trigger=(args.iteration // 2, 'iteration')) if args.resume: serializers.load_npz(args.resume, trainer) trainer.run()
args.label_dim_ft, 1, 1, 0, init_weights=None, pool=None, nobias=False)) print 'Setup optimizer' opt = SetupOptimizer(model) # Use lower learning rate for pretrained parts for name, param in opt.target.namedparams(): if name.startswith('/predictor/'): param.update_rule.hyperparam.lr = args.optimizer['lr_pretrained'] opt.add_hook(WeightDecay(0.0005)) # opt.add_hook(GradientClipping(2.0)) # Resume training from a checkpoint if args.checkpoint > 0: print 'Resume training from checkpoint' # Load model weights model = ResumeFromCheckpoint( '%s/checkpoints/%s_%s_iter_%d.chainermodel' % (args.project_folder, args.dataset, args.train_set[6:], args.checkpoint), model) # Load optimizer status serializers.load_npz( '%s/checkpoints/%s_%s_iter_%d.chaineropt' % (args.project_folder, args.dataset, args.train_set[6:], args.checkpoint), opt)
def train(args): model = FastBiaffineLSTMParser(args.model, args.word_emb_size, args.afix_emb_size, args.nlayers, args.hidden_dim, args.dep_dim, args.dropout_ratio) with open(args.model + "/params", "w") as f: log(args, f) if args.initmodel: print('Load model from', args.initmodel) chainer.serializers.load_npz(args.initmodel, model) if args.pretrained: print('Load pretrained word embeddings from', args.pretrained) model.load_pretrained_embeddings(args.pretrained) if args.gpu >= 0: chainer.cuda.get_device(args.gpu).use() model.to_gpu() if args.tritrain is not None: train = LSTMParserTriTrainDataset(args.model, args.train, args.tritrain, args.tri_weight) else: train = LSTMParserDataset(args.model, args.train) train_iter = chainer.iterators.SerialIterator(train, args.batchsize) val = LSTMParserDataset(args.model, args.val) val_iter = chainer.iterators.SerialIterator(val, args.batchsize, repeat=False, shuffle=False) optimizer = chainer.optimizers.Adam(beta2=0.9) # optimizer = chainer.optimizers.MomentumSGD(momentum=0.7) optimizer.setup(model) optimizer.add_hook(WeightDecay(2e-6)) optimizer.add_hook(GradientClipping(15.)) updater = MyUpdater(train_iter, optimizer, device=args.gpu, converter=converter) trainer = training.Trainer(updater, (args.epoch, 'epoch'), args.model) val_interval = 1000, 'iteration' log_interval = 200, 'iteration' eval_model = model.copy() eval_model.train = False trainer.extend(extensions.ExponentialShift("eps", .75, init=2e-3, optimizer=optimizer), trigger=(2500, 'iteration')) trainer.extend(MyEvaluator(val_iter, eval_model, converter, device=args.gpu), trigger=val_interval) trainer.extend(extensions.snapshot_object( model, 'model_iter_{.updater.iteration}'), trigger=val_interval) trainer.extend(extensions.LogReport(trigger=log_interval)) trainer.extend(extensions.observe_lr(observation_key="eps"), trigger=log_interval) trainer.extend(extensions.PrintReport([ 'epoch', 'iteration', 'main/tagging_accuracy', 'main/tagging_loss', 'main/parsing_accuracy', 'main/parsing_loss', 'validation/main/tagging_accuracy', 'validation/main/parsing_accuracy', 'eps' ]), trigger=log_interval) trainer.extend(extensions.ProgressBar(update_interval=10)) trainer.run()
def main(): parser = argparse.ArgumentParser() parser.add_argument('--model', choices=('ssd300', 'ssd512'), default='ssd300') parser.add_argument('--batchsize', type=int, default=32) parser.add_argument('--gpu', type=int, default=-1) parser.add_argument('--out', default='result') parser.add_argument('--resume') args = parser.parse_args() if args.model == 'ssd300': model = SSD300(n_fg_class=len(voc_detection_label_names), pretrained_model='imagenet') elif args.model == 'ssd512': model = SSD512(n_fg_class=len(voc_detection_label_names), pretrained_model='imagenet') model.use_preset('evaluate') train_chain = MultiboxTrainChain(model) if args.gpu >= 0: chainer.cuda.get_device_from_id(args.gpu).use() model.to_gpu() train = TransformDataset( ConcatenatedDataset(VOCDetectionDataset(year='2007', split='trainval'), VOCDetectionDataset(year='2012', split='trainval')), Transform(model.coder, model.insize, model.mean)) train_iter = chainer.iterators.MultiprocessIterator(train, args.batchsize) test = VOCDetectionDataset(year='2007', split='test', use_difficult=True, return_difficult=True) test_iter = chainer.iterators.SerialIterator(test, args.batchsize, repeat=False, shuffle=False) # initial lr is set to 1e-3 by ExponentialShift optimizer = chainer.optimizers.MomentumSGD() optimizer.setup(train_chain) for param in train_chain.params(): if param.name == 'b': param.update_rule.add_hook(GradientScaling(2)) else: param.update_rule.add_hook(WeightDecay(0.0005)) updater = training.StandardUpdater(train_iter, optimizer, device=args.gpu) trainer = training.Trainer(updater, (120000, 'iteration'), args.out) trainer.extend(extensions.ExponentialShift('lr', 0.1, init=1e-3), trigger=triggers.ManualScheduleTrigger([80000, 100000], 'iteration')) trainer.extend(DetectionVOCEvaluator( test_iter, model, use_07_metric=True, label_names=voc_detection_label_names), trigger=(10000, 'iteration')) log_interval = 10, 'iteration' trainer.extend(extensions.LogReport(trigger=log_interval)) trainer.extend(extensions.observe_lr(), trigger=log_interval) trainer.extend(extensions.PrintReport([ 'epoch', 'iteration', 'lr', 'main/loss', 'main/loss/loc', 'main/loss/conf', 'validation/main/map' ]), trigger=log_interval) trainer.extend(extensions.ProgressBar(update_interval=10)) trainer.extend(extensions.snapshot(), trigger=(10000, 'iteration')) trainer.extend(extensions.snapshot_object( model, 'model_iter_{.updater.iteration}'), trigger=(120000, 'iteration')) if args.resume: serializers.load_npz(args.resume, trainer) trainer.run()
def fit(self, X, y, x_test=None, data_name=None, n_epoch=1000, batch_size=20, freq_print_loss=10, freq_plot=50, n_samples=100): """ モデルのパラメータチューニングの開始 :param np.ndarray X: :param np.ndarray y: :param np.ndarray | None x_test: :param str | None data_name: :param int n_epoch: :param int batch_size: :param int freq_print_loss: :param int freq_plot: :param int n_samples: 事後分布プロットの際の事後分布のサンプリング数. :return: self """ conditions = self.conditions output_dir = "data/{data_name}/{conditions}".format(**locals()) # 画像の出力先作成 if os.path.exists(output_dir) is False: os.makedirs(output_dir) X, y = self.preprocess(X, y) if x_test is not None: x_test = self.x_transformer.transform(x_test) N = X.shape[0] # Variable 型への変換 X = Variable(preprocess_array_format(X)) y = Variable(preprocess_array_format(y)) if x_test is not None: x_test = Variable(preprocess_array_format(x_test)) self.optimizer.setup(self.model) self.optimizer.add_hook(WeightDecay(self.weight_decay)) list_loss = [] for e in range(1, n_epoch + 1): perm = np.random.permutation(N) for i in range(0, N, batch_size): idx = perm[i: i + batch_size] _x = X[idx] _y = y[idx] self.model.zerograds() loss = F.mean_squared_error(self.model(_x, apply_input=self.apply_input), _y) loss.backward() self.optimizer.update() l = F.mean_squared_error(self.model(X, False, False), y).data if e % freq_print_loss == 0: print("epoch: {e}\tloss:{l}".format(**locals())) if e % freq_plot == 0: fig, ax = self.plot_posterior(x_test, X.data, y.data, n_samples=n_samples) ax.set_title("epoch:{0:04d}".format(e)) fig.tight_layout() file_path = os.path.join(output_dir, "epoch={e:04d}.png".format(**locals())) fig.savefig(file_path, dpi=150) plt.close("all") list_loss.append([e, l]) save_logloss(list_loss, self.model.__str__())
def main(): parser = argparse.ArgumentParser() parser.add_argument('--model', choices=('ssd300', 'ssd512'), default='ssd300') parser.add_argument('--batchsize', type=int, default=32) parser.add_argument('--labelnum', type=int, default=50) parser.add_argument('--gpu', type=int, default=-1) parser.add_argument('--out', default='result') parser.add_argument('--resume') parser.add_argument('--image_label', '-il', help='Path to training image-label list file') parser.add_argument('--bbox', help='Path to training bbox list file') parser.add_argument('--image_label_test', '-ilt', help='Path to training image-label list file') parser.add_argument('--bbox_test', help='Path to training bbox list file') parser.add_argument('--image_root', '-TR', default='.', help='Root directory path of image files') args = parser.parse_args() comm = chainermn.create_communicator('naive') if comm.mpi_comm.rank == 0: print('==========================================') print('Num process (COMM_WORLD): {}'.format(MPI.COMM_WORLD.Get_size())) if args.model == 'ssd300': model = SSD300(n_fg_class=args.labelnum, pretrained_model='imagenet') elif args.model == 'ssd512': model = SSD512(n_fg_class=args.labelnum, pretrained_model='imagenet') model.use_preset('evaluate') train_chain = MultiboxTrainChain(model) if args.gpu >= 0: chainer.cuda.get_device_from_id(args.gpu).use() model.to_gpu() from test_datasets import DeepFashionBboxDataset if comm.rank == 0: train = DeepFashionBboxDataset(args.bbox, args.image_label, args.image_root) test = DeepFashionBboxDataset(args.bbox_test, args.image_label_test, args.image_root) train = TransformDataset( train, Transform(model.coder, model.insize, model.mean)) else: train, test = None, None train = chainermn.scatter_dataset(train, comm, shuffle=True) test = chainermn.scatter_dataset(test, comm, shuffle=True) train_iter = chainer.iterators.MultiprocessIterator(train, args.batchsize) test_iter = chainer.iterators.SerialIterator(test, args.batchsize, repeat=False, shuffle=False) # initial lr is set to 1e-3 by ExponentialShift optimizer = chainer.optimizers.MomentumSGD() optimizer = chainermn.create_multi_node_optimizer(optimizer, comm) optimizer.setup(train_chain) for param in train_chain.params(): if param.name == 'b': param.update_rule.add_hook(GradientScaling(2)) else: param.update_rule.add_hook(WeightDecay(0.0005)) updater = training.StandardUpdater(train_iter, optimizer, device=args.gpu) trainer = training.Trainer(updater, (120000, 'iteration'), args.out) checkpoint_interval = (1000, 'iteration') checkpointer = chainermn.create_multi_node_checkpointer( name='imagenet-example', comm=comm) checkpointer.maybe_load(trainer, optimizer) trainer.extend(checkpointer, trigger=checkpoint_interval) trainer.extend(extensions.ExponentialShift('lr', 0.1, init=1e-3), trigger=triggers.ManualScheduleTrigger([80000, 100000], 'iteration')) evaluator = DetectionVOCEvaluator(test_iter, model, use_07_metric=True, label_names=voc_bbox_label_names) evaluator = chainermn.create_multi_node_evaluator(evaluator, comm) trainer.extend(evaluator, trigger=(10000, 'iteration')) if comm.rank == 0: log_interval = 10, 'iteration' trainer.extend(extensions.LogReport(trigger=log_interval)) trainer.extend(extensions.observe_lr(), trigger=log_interval) trainer.extend(extensions.PrintReport([ 'epoch', 'iteration', 'lr', 'main/loss', 'main/loss/loc', 'main/loss/conf', 'validation/main/map' ]), trigger=log_interval) trainer.extend(extensions.ProgressBar(update_interval=10)) trainer.extend(extensions.snapshot(), trigger=(10000, 'iteration')) trainer.extend(extensions.snapshot_object( model, 'model_iter_{.updater.iteration}'), trigger=(120000, 'iteration')) if args.resume: serializers.load_npz(args.resume, trainer) trainer.run()
def train(train_data, val_data, label_names, iteration, lr, step_points, batchsize, gpu, out, val_iteration, log_iteration, loaderjob, resume): model = SSD300(n_fg_class=len(label_names), pretrained_model='imagenet') model.use_preset('evaluate') train_chain = MultiboxTrainChain(model) # alpha and k? if gpu >= 0: chainer.cuda.get_device_from_id(gpu).use() model.to_gpu() train_data = TransformDataset( train_data, Transform(model.coder, model.insize, model.mean)) if loaderjob <= 0: train_iter = chainer.iterators.SerialIterator(train_data, batchsize) else: train_iter = chainer.iterators.MultiprocessIterator(train_data, batchsize) # , n_processes=min((loaderjob, batchsize))) val_iter = chainer.iterators.SerialIterator(val_data, batchsize, repeat=False, shuffle=False) # think about repeat # initial lr is set to 1e-4 (default run_train) by ExponentialShift optimizer = chainer.optimizers.MomentumSGD() optimizer.setup(train_chain) for param in train_chain.params(): if param.name == 'b': param.update_rule.add_hook(GradientScaling(2)) else: param.update_rule.add_hook(WeightDecay(0.0005)) updater = training.StandardUpdater(train_iter, optimizer, device=gpu) trainer = training.Trainer(updater, (iteration, 'iteration'), out) trainer.extend( extensions.ExponentialShift('lr', 0.1, init=lr), trigger=triggers.ManualScheduleTrigger(step_points, 'iteration')) val_interval = (val_iteration, 'iteration') trainer.extend( DetectionVOCEvaluator( val_iter, model, use_07_metric=True, label_names=label_names), trigger=val_interval) ### # Logging log_interval = log_iteration, 'iteration' trainer.extend(extensions.LogReport(trigger=log_interval)) trainer.extend(extensions.observe_lr(), trigger=log_interval) trainer.extend(extensions.PrintReport( ['epoch', 'iteration', 'lr', 'main/loss', 'main/loss/loc', 'main/loss/conf', 'validation/main/map']), trigger=log_interval) trainer.extend(extensions.ProgressBar(update_interval=10)) trainer.extend(extensions.snapshot(), trigger=val_interval) trainer.extend( extensions.snapshot_object(model, 'model_iter_{.updater.iteration}'), trigger=val_interval) ### if resume: serializers.load_npz(resume, trainer) trainer.run()
def main(): model_cfgs = { 'resnet50': {'class': ResNet50, 'score_layer_name': 'fc6', 'kwargs': {'arch': 'fb'}}, 'resnet101': {'class': ResNet101, 'score_layer_name': 'fc6', 'kwargs': {'arch': 'fb'}}, 'resnet152': {'class': ResNet152, 'score_layer_name': 'fc6', 'kwargs': {'arch': 'fb'}} } parser = argparse.ArgumentParser( description='Learning convnet from ILSVRC2012 dataset') parser.add_argument('train', help='Path to root of the train dataset') parser.add_argument('val', help='Path to root of the validation dataset') parser.add_argument('--export', type=str, default=None, help='Export the model to ONNX') parser.add_argument('--compile', type=str, default=None, help='Compile the model') parser.add_argument('--computation_order', type=str, default=None, help='Computation order in backpropagation') parser.add_argument('--model', '-m', choices=model_cfgs.keys(), default='resnet50', help='Convnet models') parser.add_argument('--communicator', type=str, default='pure_nccl', help='Type of communicator') parser.add_argument('--loaderjob', type=int, default=4) parser.add_argument('--batchsize', type=int, default=32, help='Batch size for each worker') parser.add_argument('--lr', type=float) parser.add_argument('--momentum', type=float, default=0.9) parser.add_argument('--weight-decay', type=float, default=0.0001) parser.add_argument('--out', type=str, default='result') parser.add_argument('--epoch', type=int, default=90) parser.add_argument('--iterations', '-I', type=int, default=None, help='Number of iterations to train') parser.add_argument('--no_use_fixed_batch_dataset', dest='use_fixed_batch_dataset', action='store_false', help='Disable the use of FixedBatchDataset') parser.add_argument('--compiler-log', action='store_true', help='Enables compile-time logging') parser.add_argument('--trace', action='store_true', help='Enables runtime tracing') parser.add_argument('--verbose', action='store_true', help='Enables runtime verbose log') parser.add_argument('--skip_runtime_type_check', action='store_true', help='Skip runtime type check') parser.add_argument('--dump_memory_usage', type=int, default=0, help='Dump memory usage (0-2)') parser.add_argument('--quiet_period', type=int, default=0, help='Quiet period after runtime report') parser.add_argument('--overwrite_batchsize', action='store_true', help='Overwrite batch size') args = parser.parse_args() # https://docs.chainer.org/en/stable/chainermn/tutorial/tips_faqs.html#using-multiprocessiterator if hasattr(multiprocessing, 'set_start_method'): multiprocessing.set_start_method('forkserver') p = multiprocessing.Process() p.start() p.join() comm = chainermn.create_communicator(args.communicator) device = comm.intra_rank if args.lr is not None: lr = args.lr else: lr = 0.1 * (args.batchsize * comm.size) / 256 if comm.rank == 0: print('lr={}: lr is selected based on the linear ' 'scaling rule'.format(lr)) label_names = directory_parsing_label_names(args.train) model_cfg = model_cfgs[args.model] extractor = model_cfg['class']( n_class=len(label_names), **model_cfg['kwargs']) extractor.pick = model_cfg['score_layer_name'] # Following https://arxiv.org/pdf/1706.02677.pdf, # the gamma of the last BN of each resblock is initialized by zeros. for l in extractor.links(): if isinstance(l, Bottleneck): l.conv3.bn.gamma.data[:] = 0 if args.export is not None: chainer_compiler.use_unified_memory_allocator() extractor.to_device(device) x = extractor.xp.zeros((args.batchsize, 3, 224, 224)).astype('f') chainer_compiler.export(extractor, [x], args.export) return if args.compile is not None: print('run compiled model') chainer_compiler.use_chainerx_shared_allocator() extractor.to_device(device) # init params with chainer.using_config('enable_backprop', False),\ chainer.using_config('train', False): x = extractor.xp.zeros((1, 3, 224, 224)).astype('f') extractor(x) compiler_kwargs = {} if args.compiler_log: compiler_kwargs['compiler_log'] = True runtime_kwargs = {} if args.trace: runtime_kwargs['trace'] = True if args.verbose: runtime_kwargs['verbose'] = True if args.skip_runtime_type_check: runtime_kwargs['check_types'] = False if args.dump_memory_usage >= 1: runtime_kwargs['dump_memory_usage'] = args.dump_memory_usage free, total = cupy.cuda.runtime.memGetInfo() used = total - free runtime_kwargs['base_memory_usage'] = used onnx_filename = args.compile if args.overwrite_batchsize: new_onnx_filename = ('/tmp/overwrite_batchsize_' + os.path.basename(onnx_filename)) new_input_types = [ input_rewriter.Type(shape=(args.batchsize, 3, 224, 224)) ] input_rewriter.rewrite_onnx_file(onnx_filename, new_onnx_filename, new_input_types) onnx_filename = new_onnx_filename extractor_cc = chainer_compiler.compile_onnx( extractor, onnx_filename, 'onnx_chainer', computation_order=args.computation_order, compiler_kwargs=compiler_kwargs, runtime_kwargs=runtime_kwargs, quiet_period=args.quiet_period) model = Classifier(extractor_cc) else: print('run vanilla chainer model') model = Classifier(extractor) train_data = DirectoryParsingLabelDataset(args.train) val_data = DirectoryParsingLabelDataset(args.val) train_data = TransformDataset( train_data, ('img', 'label'), TrainTransform(extractor.mean)) val_data = TransformDataset( val_data, ('img', 'label'), ValTransform(extractor.mean)) print('finished loading dataset') if comm.rank == 0: train_indices = np.arange(len(train_data)) val_indices = np.arange(len(val_data)) else: train_indices = None val_indices = None train_indices = chainermn.scatter_dataset( train_indices, comm, shuffle=True) val_indices = chainermn.scatter_dataset(val_indices, comm, shuffle=True) train_data = train_data.slice[train_indices] val_data = val_data.slice[val_indices] if args.use_fixed_batch_dataset: train_data = FixedBatchDataset(train_data, args.batchsize) val_data = FixedBatchDataset(val_data, args.batchsize) train_iter = chainer.iterators.MultiprocessIterator( train_data, args.batchsize, n_processes=args.loaderjob) val_iter = iterators.MultiprocessIterator( val_data, args.batchsize, repeat=False, shuffle=False, n_processes=args.loaderjob) optimizer = chainermn.create_multi_node_optimizer( CorrectedMomentumSGD(lr=lr, momentum=args.momentum), comm) optimizer.setup(model) for param in model.params(): if param.name not in ('beta', 'gamma'): param.update_rule.add_hook(WeightDecay(args.weight_decay)) if device >= 0: chainer.cuda.get_device(device).use() model.to_gpu() updater = chainer.training.StandardUpdater( train_iter, optimizer, device=device) if args.iterations: stop_trigger = (args.iterations, 'iteration') else: stop_trigger = (args.epoch, 'epoch') trainer = training.Trainer( updater, stop_trigger, out=args.out) @make_shift('lr') def warmup_and_exponential_shift(trainer): epoch = trainer.updater.epoch_detail warmup_epoch = 5 if epoch < warmup_epoch: if lr > 0.1: warmup_rate = 0.1 / lr rate = warmup_rate \ + (1 - warmup_rate) * epoch / warmup_epoch else: rate = 1 elif epoch < 30: rate = 1 elif epoch < 60: rate = 0.1 elif epoch < 80: rate = 0.01 else: rate = 0.001 return rate * lr trainer.extend(warmup_and_exponential_shift) evaluator = chainermn.create_multi_node_evaluator( extensions.Evaluator(val_iter, model, device=device), comm) trainer.extend(evaluator, trigger=(1, 'epoch')) log_interval = 0.1, 'epoch' print_interval = 0.1, 'epoch' if comm.rank == 0: trainer.extend(chainer.training.extensions.observe_lr(), trigger=log_interval) trainer.extend( extensions.snapshot_object( extractor, 'snapshot_model_{.updater.epoch}.npz'), trigger=(args.epoch, 'epoch')) trainer.extend(extensions.LogReport(trigger=log_interval)) trainer.extend(extensions.PrintReport( ['iteration', 'epoch', 'elapsed_time', 'lr', 'main/loss', 'validation/main/loss', 'main/accuracy', 'validation/main/accuracy'] ), trigger=print_interval) trainer.extend(extensions.ProgressBar(update_interval=10)) trainer.run()
print 'Prepare Dataset' datasets = DataChef.PrepData(args) print 'Initialize Model' predictor = Models.InceptionV3(args) classifiers = [Models.Classifier(label_dim) for label_dim in args.label_dim] model = Models.InceptionV3Classifier(predictor, classifiers, args) print 'Setup optimizer' opt = SetupOptimizer(model) # Use lower learning rate for pretrained parts for name, param in opt.target.namedparams(): if name.startswith('/predictor/'): param.update_rule.hyperparam.lr = args.optimizer['lr_pretrained'] opt.add_hook(WeightDecay(args.optimizer['weightdecay'])) opt.add_hook(GradientClipping(args.optimizer['gradientclipping'])) # Resume training from a checkpoint if args.checkpoint > 0: print 'Resume training from checkpoint' # Load model weights ResumeFromCheckpoint( '%s/checkpoints/%s_iter_%d_%s.chainermodel' % (args.project_folder, args.dataset, args.checkpoint, args.suffix), model) # Load optimizer status serializers.load_npz( '%s/checkpoints/%s_iter_%d_%s.chaineropt' % (args.project_folder, args.dataset, args.checkpoint, args.suffix), opt) # Adjust the learning rate
def main(config): opts = config() comm = chainermn.create_communicator(opts.communicator) device = comm.intra_rank backborn_cfg = opts.backborn_cfg df = pd.read_csv(opts.path_data + opts.train_df).sample(frac=1) ################### pseudo labeling ######################### if opts.pseudo_labeling_path is not None: test_df = pd.read_csv(opts.path_data + opts.test_df) labels = np.load(opts.pseudo_labeling_path, allow_pickle=False) labels = np.concatenate((labels, labels)) count = 0 valid_array = [] valid_sirna = [] for i, label in enumerate(labels): if label.max() > 0.0013: count = count + 1 valid_array.append(i) valid_sirna.append(label.argmax()) print(count) pseudo_df = test_df.iloc[valid_array, :] pseudo_df["sirna"] = valid_sirna pseudo_df = pseudo_df df = pd.concat([df, pseudo_df]).sample(frac=1) ################### pseudo labeling ######################### for i, (train_df, valid_df) in enumerate( stratified_groups_kfold(df, target=opts.fold_target, n_splits=opts.fold)): if comm.rank == 0: train_df.to_csv( opts.path_data + 'train' + '_fold' + str(i) + '.csv', columns=[ 'id_code', 'experiment', 'plate', 'well', 'sirna', 'filename', 'cell', 'site' ]) valid_df.to_csv( opts.path_data + 'valid' + '_fold' + str(i) + '.csv', columns=[ 'id_code', 'experiment', 'plate', 'well', 'sirna', 'filename', 'cell', 'site' ]) print("Save a csvfile of fold_" + str(i)) dataset = opts.dataset train_dataset = dataset(train_df, opts.path_data) val_dataset = dataset(valid_df, opts.path_data) backborn = chcv2_get_model( backborn_cfg['name'], pretrained=backborn_cfg['pretrain'], in_size=opts.input_shape)[backborn_cfg['layer']] model = opts.model(backborn=backborn).copy(mode='init') if device >= 0: chainer.cuda.get_device(device).use() model.to_gpu() mean = opts.mean train_data = TransformDataset(train_dataset, opts.train_transform) val_data = TransformDataset(val_dataset, opts.valid_trainsform) if comm.rank == 0: train_indices = train_data val_indices = val_data else: train_indices = None val_indices = None train_data = chainermn.scatter_dataset(train_indices, comm, shuffle=True) val_data = chainermn.scatter_dataset(val_indices, comm, shuffle=False) train_iter = chainer.iterators.MultiprocessIterator( train_data, opts.batchsize, shuffle=True, n_processes=opts.loaderjob) val_iter = chainer.iterators.MultiprocessIterator( val_data, opts.batchsize, repeat=False, shuffle=False, n_processes=opts.loaderjob) print('finished loading dataset') if device >= 0: chainer.cuda.get_device(device).use() model.to_gpu() if opts.optimizer == "CorrectedMomentumSGD": optimizer = chainermn.create_multi_node_optimizer( CorrectedMomentumSGD(lr=opts.lr), comm) elif opts.optimizer == "NesterovAG": optimizer = chainermn.create_multi_node_optimizer( NesterovAG(lr=opts.lr), comm) else: optimizer = chainermn.create_multi_node_optimizer( Adam(alpha=opts.alpha, weight_decay_rate=opts.weight_decay, adabound=True, final_lr=0.5), comm) optimizer.setup(model) if opts.optimizer == "CorrectedMomentumSGD": for param in model.params(): if param.name not in ('beta', 'gamma'): param.update_rule.add_hook(WeightDecay(opts.weight_decay)) if opts.fc_lossfun == 'softmax_cross_entropy': fc_lossfun = F.softmax_cross_entropy elif opts.fc_lossfun == 'focal_loss': if opts.ls: focal_loss = FocalLoss(label_smoothing=True) else: focal_loss = FocalLoss() fc_lossfun = focal_loss.loss elif opts.fc_lossfun == 'auto_focal_loss': if opts.ls: focal_loss = AutoFocalLoss(label_smoothing=True) else: focal_loss = AutoFocalLoss() fc_lossfun = focal_loss.loss elif opts.fc_lossfun == 'auto_focal_loss_bce': if opts.ls: focal_loss = AutoFocalLossBCE(label_smoothing=True) else: focal_loss = AutoFocalLoss() fc_lossfun = focal_loss.loss if opts.metric_lossfun == 'arcface': arcface = ArcFace() metric_lossfun = arcface.loss elif opts.metric_lossfun == 'adacos': adacos = AdaCos() metric_lossfun = adacos.loss updater = opts.updater(train_iter, optimizer, model, device=device, max_epoch=opts.max_epoch, fix_sche=opts.fix_sche, metric_lossfun=metric_lossfun, fc_lossfun=fc_lossfun, metric_w=opts.metric_w, fc_w=opts.fc_w) evaluator = chainermn.create_multi_node_evaluator( opts.evaluator(val_iter, model, device=device, max_epoch=opts.max_epoch, fix_sche=opts.fix_sche, metric_lossfun=metric_lossfun, fc_lossfun=fc_lossfun, metric_w=opts.metric_w, fc_w=opts.fc_w), comm) trainer = training.Trainer(updater, (opts.max_epoch, 'epoch'), out=opts.out + '_fold' + str(i)) if opts.optimizer == "CorrectedMomentumSGD": trainer.extend(extensions.ExponentialShift('lr', opts.shift_lr), trigger=ManualScheduleTrigger( opts.lr_points, 'epoch')) elif opts.optimizer == "NesterovAG": trainer.extend(extensions.ExponentialShift('lr', opts.shift_lr), trigger=ManualScheduleTrigger( opts.lr_points, 'epoch')) else: trainer.extend(extensions.ExponentialShift('alpha', opts.shift_lr), trigger=ManualScheduleTrigger( opts.lr_points, 'epoch')) trainer.extend(evaluator, trigger=(int(opts.max_epoch / 10), 'epoch')) # trainer.extend(evaluator, trigger=(int(1), 'epoch')) log_interval = 0.1, 'epoch' print_interval = 0.1, 'epoch' if comm.rank == 0: trainer.extend(chainer.training.extensions.observe_lr(), trigger=log_interval) trainer.extend(extensions.snapshot_object( model, 'snapshot_model' + '_{.updater.epoch}.npz'), trigger=(opts.max_epoch / 10, 'epoch')) trainer.extend(extensions.snapshot_object( model, 'snapshot_model_f1max.npz'), trigger=chainer.training.triggers.MaxValueTrigger( 'validation/main/accuracy', trigger=(opts.max_epoch / 10, 'epoch'))) trainer.extend(extensions.LogReport(trigger=log_interval)) trainer.extend(extensions.PrintReport([ 'iteration', 'epoch', 'elapsed_time', 'lr', 'main/loss', 'main/face_loss', 'main/ce_loss', 'main/accuracy', 'validation/main/loss', 'validation/main/face_loss', 'validation/main/ce_loss', 'validation/main/accuracy' ]), trigger=print_interval) trainer.extend(extensions.ProgressBar(update_interval=10)) trainer.run()
def train(train_file, test_file, embed_file, n_epoch=20, batch_size=20, gpu=-1, save=None): # Load files Log.i('initialize preprocessor with %s' % embed_file) processor = Preprocessor(embed_file) reader = CorpusReader(processor) Log.i('load train dataset from %s' % str(train_file)) train_dataset = reader.load(train_file, train=True) Log.i('load test dataset from %s' % str(test_file)) test_dataset = reader.load(test_file, train=False) hparams = { 'dropout_ratio': 0.2, 'adagrad_lr': 0.2, 'weight_decay': 0.0001, } Log.v('') Log.v("initialize ...") Log.v('--------------------------------') Log.i('# Minibatch-size: %d' % batch_size) Log.i('# epoch: %d' % n_epoch) Log.i('# gpu: %d' % gpu) Log.i('# hyper-parameters: %s' % str(hparams)) Log.v('--------------------------------') Log.v('') # Set up a neural network cls = BLSTMCRF if _use_crf else BLSTM model = cls( embeddings=processor.embeddings, n_labels=4, dropout=hparams['dropout_ratio'], train=True, ) if gpu >= 0: cuda.get_device(gpu).use() model.to_gpu() eval_model = model.copy() eval_model.train = False # Setup an optimizer optimizer = optimizers.AdaGrad(lr=hparams['adagrad_lr']) optimizer.setup(model) optimizer.add_hook(WeightDecay(hparams['weight_decay'])) def _update(optimizer, loss): optimizer.target.zerograds() loss.backward() optimizer.update() def _process(dataset, model): size = len(dataset) batch_count = 0 loss = 0.0 accuracy = 0.0 p = ProgressBar(min_value=0, max_value=size, fd=sys.stderr).start() for i, (xs, ys) in enumerate( dataset.batch(batch_size, colwise=True, shuffle=model.train)): p.update((batch_size * i) + 1) batch_count += 1 batch_loss, batch_accuracy = model(xs, ys) loss += batch_loss.data accuracy += batch_accuracy if model.train: _update(optimizer, batch_loss) p.finish() Log.i("[%s] epoch %d - #samples: %d, loss: %f, accuracy: %f" % ('training' if model.train else 'evaluation', epoch + 1, size, loss / batch_count, accuracy / batch_count)) for epoch in range(n_epoch): _process(train_dataset, model) _process(test_dataset, eval_model) Log.v('-') if save is not None: Log.i("saving the model to %s ..." % save) serializers.save_npz(save, model)
chainer.config.autotune = True """Creating Iterators for training. The Transform function is used on train_dataset.""" transformed_train_dataset = TransformDataset(train_dataset, Transform(model.coder, model.insize, model.mean)) train_iter = chainer.iterators.MultiprocessIterator(transformed_train_dataset, batchsize) valid_iter = chainer.iterators.SerialIterator(valid_dataset, batchsize, repeat=False, shuffle=False) optimizer = chainer.optimizers.MomentumSGD() optimizer.setup(train_chain) for param in train_chain.params(): if param.name == 'b': param.update_rule.add_hook(GradientScaling(2)) else: param.update_rule.add_hook(WeightDecay(0.0005)) updater = training.updaters.StandardUpdater( train_iter, optimizer, device=gpu_id) trainer = training.Trainer( updater, (training_epoch, 'epoch'), out) trainer.extend( extensions.ExponentialShift('lr', lr_decay_rate, init=initial_lr), trigger=triggers.ManualScheduleTrigger(lr_decay_timing, 'epoch')) trainer.extend( DetectionVOCEvaluator( valid_iter, model, use_07_metric=False,
def main(): parser = ArgumentParser() parser.add_argument('train_data', help='train data') parser.add_argument('train_labels', help='train labels') parser.add_argument('--val-data', default=None, help='val data') parser.add_argument('--val-labels', default=None, help='val labels') parser.add_argument('-b', '--batch-size', type=int, default=5, help='mini-batch size (default=5)') parser.add_argument('--beta2', type=float, default=0.999, help='beta2 of Adam (default=0.999)') parser.add_argument('-g', '--gpu-id', type=int, default=-1, help='GPU ID (default=-1, indicates CPU)') parser.add_argument('--ignore-labels', type=int, default=[], nargs='+', help='labels to ignore (default=[])') parser.add_argument('-l', '--learning-rate', type=float, default=0.1, help='learning rate (default=0.1)') parser.add_argument('--max-iter', type=int, default=160000, help='train model up to max-iter (default=160000)') parser.add_argument( '--mean-interval', type=int, default=1000, help='calculate mean of train/loss (and validation loss) ' + 'every mean-interval iters (default=1000)') parser.add_argument('--model', default=None, help='resume to train the model') parser.add_argument('--momentum', type=float, default=0.9, help='momentum rate (default=0.9)') parser.add_argument('--n-classes', type=int, default=5, help='number of classes (default=5)') parser.add_argument('--noise', default='no', help='noise injection method. \'no\', \'patch\', ' + 'and \'permutation\' are available (default=\'no\')') parser.add_argument('--optim', default='nesterov', help='optimization method. \'sgd\', \'nesterov\', ' + 'and \'adam\' are available (default=\'nesterov\')') parser.add_argument( '-o', '--outdir', default='./', help='trained models and optimizer states are stored in outdir ' + '(default=\'./\')') parser.add_argument( '--queue-maxsize', type=int, default=10, help='maxsize of queues for training and validation (default=10)') parser.add_argument( '--save-interval', type=int, default=10000, help='save model & optimizer every save-interval iters (default=10000)' ) parser.add_argument( '--state', default=None, help='optimizer state. resume to train the model with the optimizer') parser.add_argument('-w', '--weight-decay', type=float, default=1e-4, help='weight decay factor (default=1e-4)') args = parser.parse_args() print(argv2string(sys.argv) + '\n') for arg in dir(args): if arg[:1] == '_': continue print('{} = {}'.format(arg, getattr(args, arg))) print() if not os.path.isdir(args.outdir): os.makedirs(args.outdir) print('mkdir ' + args.outdir + '\n') model = Model(in_ch=3, out_ch=args.n_classes) if args.model is not None: S.load_npz(args.model, model) loss_func = Loss(model) if args.optim.lower() in 'sgd': if args.momentum > 0: optim = optims.CorrectedMomentumSGD(lr=args.learning_rate, momentum=args.momentum) else: optim = optims.SGD(lr=args.learning_rate) elif args.optim.lower() in 'nesterovag': optim = optims.NesterovAG(lr=args.learning_rate, momentum=args.momentum) elif args.optim.lower() in 'adam': optim = optims.Adam(alpha=args.learning_rate, beta1=args.momentum, beta2=args.beta2, weight_decay_rate=args.weight_decay, amsgrad=True) else: raise ValueError('Please specify an available optimizer name.\n' + 'SGD, NesterovAG, and Adam are available.') print('{}\n'.format(type(optim))) optim.setup(model) if args.state is not None: S.load_npz(args.state, optim) if (args.weight_decay > 0) and not isinstance(optim, optims.Adam): optim.add_hook(WeightDecay(args.weight_decay)) optim.add_hook(GradientClipping(1)) lr_decay_iter_dict = { int(5 * args.max_iter / 8): 0.1, int(7 * args.max_iter / 8): 0.1, } with open(args.train_data, 'r') as f: train_data_path_list = [line.strip() for line in f.readlines()] with open(args.train_labels, 'r') as f: train_labels_path_list = [line.strip() for line in f.readlines()] assert len(train_data_path_list) == len(train_labels_path_list) if (args.val_data is not None) or (args.val_labels is not None): if (args.val_data is not None) and (args.val_labels is not None): with open(args.val_data, 'r') as f: val_data_path_list = [line.strip() for line in f.readlines()] with open(args.val_labels, 'r') as f: val_labels_path_list = [line.strip() for line in f.readlines()] assert len(val_data_path_list) == len(val_labels_path_list) else: raise ValueError('Either val_data or val_labels is not specified.') train_queue = mp.Queue(maxsize=args.queue_maxsize) train_generator = BatchGenerator(args.batch_size, train_data_path_list, train_labels_path_list, train_queue, train=True, noise_injection=args.noise, out_height=512, out_width=512, max_height=1216, max_width=1216, min_height=832, min_width=832) train_generator.start() if args.val_data is None: val_queue = None else: val_queue = mp.Queue(maxsize=args.queue_maxsize) try: val_generator = BatchGenerator(1, val_data_path_list, val_labels_path_list, val_queue, train=False, out_height=608, out_width=968) val_generator.start() except Exception: train_generator.terminate() train_queue.close() val_queue.close() raise try: train(loss_func, optim, train_queue, args.max_iter, args.mean_interval, args.save_interval, val_queue, lr_decay_iter_dict, args.gpu_id, args.ignore_labels, args.outdir) except BaseException: train_generator.terminate() train_queue.close() if val_queue is not None: val_generator.terminate() val_queue.close() raise train_generator.terminate() train_queue.close() if val_queue is not None: val_generator.terminate() val_queue.close()
def run(input_dir, test_dir, output, batch_size, iterator='SerialIterator', device=-1, pretrained_model='', save_trigger=10000, test_trigger=1000, parser_module='XMLParser', train_module='MultiboxTrainChain', model_module='chainercv.links.SSD300'): pretrained_model = join(PROJECT_DIR, pretrained_model) if pretrained_model and os.path.isfile(pretrained_model): print('Pretrained model {} loaded.'.format(pretrained_model)) else: print('Pretrained model file not found, ' + 'using imagenet as default.') pretrained_model = 'imagenet' parser = _import_module('multibuildingdetector.parsers.{}', parser_module) model = _import_class(model_module)(n_fg_class=len(parser.LABEL_NAMES), pretrained_model=pretrained_model) model.use_preset('evaluate') train_chain = _import_class('multibuildingdetector.trainchains.{}' .format(train_module))(model) if device >= 0: chainer.cuda.get_device_from_id(device).use() model.to_gpu() train, test = load_train_test_set(input_dir, test_dir, parser) augmented_train = TransformDataset( train, ImageAugmentation(model.coder, model.insize, model.mean)) train_iter = getattr(chainer.iterators, iterator)(augmented_train, batch_size) optimizer = chainer.optimizers.Adam() optimizer.setup(train_chain) for param in train_chain.params(): if param.name == 'b': param.update_rule.add_hook(GradientScaling(2)) else: param.update_rule.add_hook(WeightDecay(0.0005)) if device is None: updater = chainer.training.StandardUpdater(train_iter, optimizer) else: updater = chainer.training.StandardUpdater(train_iter, optimizer, device=device) trainer = chainer.training.Trainer(updater, (120000, 'iteration'), output) log_fields = ['main/' + x for x in train_chain.loss_labels] if train_module == 'MultiboxTrainChain': test_iter = chainer.iterators.SerialIterator( test, batch_size, repeat=False, shuffle=False) trainer.extend( DetectionVOCEvaluator( test_iter, model, use_07_metric=True, label_names=parser.LABEL_NAMES), trigger=(test_trigger, 'iteration')) log_fields.append('validation/main/map') else: triplet_test = TransformDataset( test, ImageAugmentation(model.coder, model.insize, model.mean, augment=False)) test_iter = chainer.iterators.SerialIterator( triplet_test, batch_size, repeat=False, shuffle=False) trainer.extend( TripletEvaluator( test_iter, model, label_names=parser.LABEL_NAMES, save_plt=True, save_path=output), trigger=(test_trigger, 'iteration')) log_interval = 10, 'iteration' trainer.extend(extensions.LogReport(trigger=log_interval)) trainer.extend(extensions.observe_lr(), trigger=log_interval) trainer.extend(extensions.PrintReport( ['epoch', 'iteration', 'lr', *log_fields])) trainer.extend(extensions.snapshot_object( model, 'model_iter_{.updater.iteration}'), trigger=(save_trigger, 'iteration')) trainer.extend(extensions.ProgressBar(update_interval=10)) trainer.run()
def main(): # Parameters epoch_max = 200 minibatch_max = 64 lambda_em = 1.0 gpu_id = 0 save_interval = 50 model_name = 'model' dataset_path = 'mnist.pklb' # Preparation of dataset # Change this logic for your dataset with open(dataset_path, 'rb') as f: mnist = pickle.Unpickler(f).load() ds_train = mnist['image']['train'] ds_train = [ np.asarray(x.reshape(-1), dtype='float32') / 255.0 for x in ds_train ] print('the number of dataset', len(ds_train)) # Setup of our model model = net.DDGMNet() if gpu_id >= 0: model.to_gpu(gpu_id) opt = optimizers.Adam(alpha=1e-3) opt.setup(model) opt.add_hook(WeightDecay(rate=1e-6)) # Learning loop for epoch_id in range(0, epoch_max): print('start epoch %d' % (epoch_id)) # Evaluation fake, _ = model.generate(mb_len=10) fake = cuda.to_cpu(fake.data) img_array = (np.hstack([x.reshape((28, 28)) for x in fake]) * 255).astype('uint8') Image.fromarray(img_array).save('%s_test_%d.png' % (model_name, epoch_id)) print('generated gen_test_%d.png' % (epoch_id)) sum_e_real = 0.0 sum_e_fake = 0.0 sum_entropy = 0.0 sum_trial = 0 order_dataset = np.random.permutation(len(ds_train)) for i in range(0, len(order_dataset), minibatch_max): # Energy calculation mb = [ model.xp.asarray(ds_train[j]) for j in order_dataset[i:i + minibatch_max] ] mb_len = len(mb) mb = chainer.variable.Variable(model.xp.vstack(mb), volatile=False) e_real = model.energy(mb, True) fake, entropy = model.generate(mb_len=mb_len) e_fake = model.energy(fake, True) # Gradient calculation with backprop model.zerograds() e_fake.backward() # changing the sign of gradinets related with the energy model model.scale_em_grads(-lambda_em) (lambda_em * e_real - entropy).backward() # go opt.update() # Outputing information print('%d r:%f f:%f e:%f' % (i, float( e_real.data), float(e_fake.data), float(entropy.data))) sum_e_real += float(e_real.data) * mb_len sum_e_fake += float(e_fake.data) * mb_len sum_entropy += float(entropy.data) * mb_len if epoch_id % save_interval == 0: serializers.save_npz('%s_%d' % (model_name, epoch_id), model) avr_e_real = sum_e_real / len(ds_train) avr_e_fake = sum_e_fake / len(ds_train) avr_entropy = sum_entropy / len(ds_train) with open('%s_log.txt' % (model_name), 'a') as f: f.writelines([ '%d\t%f\t%f\t%f\n' % (epoch_id, avr_e_real, avr_e_fake, avr_entropy) ]) serializers.save_npz('%s_%d' % (model_name, epoch_id), model) print('end')
def main(): model_cfgs = { 'resnet50': { 'class': ResNet50, 'score_layer_name': 'fc6', 'kwargs': { 'arch': 'fb' } }, 'resnet101': { 'class': ResNet101, 'score_layer_name': 'fc6', 'kwargs': { 'arch': 'fb' } }, 'resnet152': { 'class': ResNet152, 'score_layer_name': 'fc6', 'kwargs': { 'arch': 'fb' } } } parser = argparse.ArgumentParser( description='Learning convnet from ILSVRC2012 dataset') parser.add_argument('train', help='Path to root of the train dataset') parser.add_argument('val', help='Path to root of the validation dataset') parser.add_argument('--model', '-m', choices=model_cfgs.keys(), default='resnet50', help='Convnet models') parser.add_argument('--communicator', type=str, default='pure_nccl', help='Type of communicator') parser.add_argument('--loaderjob', type=int, default=4) parser.add_argument('--batchsize', type=int, default=32, help='Batch size for each worker') parser.add_argument('--lr', type=float) parser.add_argument('--momentum', type=float, default=0.9) parser.add_argument('--weight_decay', type=float, default=0.0001) parser.add_argument('--out', type=str, default='result') parser.add_argument('--epoch', type=int, default=90) args = parser.parse_args() # https://docs.chainer.org/en/stable/chainermn/tutorial/tips_faqs.html#using-multiprocessiterator if hasattr(multiprocessing, 'set_start_method'): multiprocessing.set_start_method('forkserver') p = multiprocessing.Process() p.start() p.join() comm = chainermn.create_communicator(args.communicator) device = comm.intra_rank if args.lr is not None: lr = args.lr else: lr = 0.1 * (args.batchsize * comm.size) / 256 if comm.rank == 0: print('lr={}: lr is selected based on the linear ' 'scaling rule'.format(lr)) label_names = directory_parsing_label_names(args.train) model_cfg = model_cfgs[args.model] extractor = model_cfg['class'](n_class=len(label_names), **model_cfg['kwargs']) extractor.pick = model_cfg['score_layer_name'] model = Classifier(extractor) # Following https://arxiv.org/pdf/1706.02677.pdf, # the gamma of the last BN of each resblock is initialized by zeros. for l in model.links(): if isinstance(l, Bottleneck): l.conv3.bn.gamma.data[:] = 0 train_data = DirectoryParsingLabelDataset(args.train) val_data = DirectoryParsingLabelDataset(args.val) train_data = TransformDataset(train_data, ('img', 'label'), TrainTransform(extractor.mean)) val_data = TransformDataset(val_data, ('img', 'label'), ValTransform(extractor.mean)) print('finished loading dataset') if comm.rank == 0: train_indices = np.arange(len(train_data)) val_indices = np.arange(len(val_data)) else: train_indices = None val_indices = None train_indices = chainermn.scatter_dataset(train_indices, comm, shuffle=True) val_indices = chainermn.scatter_dataset(val_indices, comm, shuffle=True) train_data = train_data.slice[train_indices] val_data = val_data.slice[val_indices] train_iter = chainer.iterators.MultiprocessIterator( train_data, args.batchsize, n_processes=args.loaderjob) val_iter = iterators.MultiprocessIterator(val_data, args.batchsize, repeat=False, shuffle=False, n_processes=args.loaderjob) optimizer = chainermn.create_multi_node_optimizer( CorrectedMomentumSGD(lr=lr, momentum=args.momentum), comm) optimizer.setup(model) for param in model.params(): if param.name not in ('beta', 'gamma'): param.update_rule.add_hook(WeightDecay(args.weight_decay)) if device >= 0: chainer.cuda.get_device(device).use() model.to_gpu() updater = chainer.training.StandardUpdater(train_iter, optimizer, device=device) trainer = training.Trainer(updater, (args.epoch, 'epoch'), out=args.out) @make_shift('lr') def warmup_and_exponential_shift(trainer): epoch = trainer.updater.epoch_detail warmup_epoch = 5 if epoch < warmup_epoch: if lr > 0.1: warmup_rate = 0.1 / lr rate = warmup_rate \ + (1 - warmup_rate) * epoch / warmup_epoch else: rate = 1 elif epoch < 30: rate = 1 elif epoch < 60: rate = 0.1 elif epoch < 80: rate = 0.01 else: rate = 0.001 return rate * lr trainer.extend(warmup_and_exponential_shift) evaluator = chainermn.create_multi_node_evaluator( extensions.Evaluator(val_iter, model, device=device), comm) trainer.extend(evaluator, trigger=(1, 'epoch')) log_interval = 0.1, 'epoch' print_interval = 0.1, 'epoch' if comm.rank == 0: trainer.extend(chainer.training.extensions.observe_lr(), trigger=log_interval) trainer.extend(extensions.snapshot_object( extractor, 'snapshot_model_{.updater.epoch}.npz'), trigger=(args.epoch, 'epoch')) trainer.extend(extensions.LogReport(trigger=log_interval)) trainer.extend(extensions.PrintReport([ 'iteration', 'epoch', 'elapsed_time', 'lr', 'main/loss', 'validation/main/loss', 'main/accuracy', 'validation/main/accuracy' ]), trigger=print_interval) trainer.extend(extensions.ProgressBar(update_interval=10)) trainer.run()
def main(args): if not os.path.exists(args.dir_model): os.mkdir(args.dir_model) log_file_name = os.path.join(args.dir_model, 'log.txt') args.store() dataset_train = get_dataset(args.dataset_train_path) print('The number of datasets:') print('dataset_train: %d' % (len(dataset_train))) # ToDo: loading a model from dir_base_model if args.gpuid >= 0: cuda.get_device(args.gpuid).use() target = NeuralModel() if args.gpuid >= 0: target.to_gpu(args.gpuid) target_opt = chainer.optimizers.SGD(lr=args.train_sgd_lr) target_opt.setup(target) target_opt.add_hook(WeightDecay(rate=args.train_weight_decay_rate)) target_opt.add_hook(GradientClipping(args.train_gradient_clipping_norm)) def save_target(save_path): target.to_cpu() serializers.save_npz(save_path, target) if args.gpuid >= 0: target.to_gpu(args.gpuid) def save_charvec(save_path): target.to_cpu() charvec_dict = {} for v, k in dataset_train: mu, ln_var = target.encode(v[None, :, :]) charvec_dict[k] = mu.data[0] with open(save_path, 'wb') as f: pickle.Pickler(f).dump(charvec_dict) if not (args.decode_samples is None): for i in range(len(args.decode_samples)): c = args.decode_samples[i] array = (255 * target.decode( charvec_dict[c][None, :]).data[0]).astype('uint8') img_size = array.shape[0] array = np.broadcast_to(array[:, :, None], (img_size, img_size, 3)) Image.fromarray(array).save(save_path + str(i) + '.bmp') if args.gpuid >= 0: target.to_gpu(args.gpuid) print('start training') for ep_id in range(args.train_max_epoch): np.random.shuffle(dataset_train) C = args.kld_function[ep_id] epoch_loss = 0 epoch_rec_loss = 0 epoch_kld_loss = 0 for mb_id in range(0, len(dataset_train), args.minibatch_size_train): mb = dataset_train[mb_id:mb_id + args.minibatch_size_train] mb = [v for v, k in mb] x = target.xp.asarray(np.stack(mb)) loss, rec_loss, kld_loss = target(x, C=C) target.zerograds() loss.backward() target_opt.update() loss.unchain_backward() epoch_loss += float(loss.data) * len(mb) epoch_rec_loss += rec_loss * len(mb) epoch_kld_loss += kld_loss * len(mb) epoch_loss /= len(dataset_train) epoch_rec_loss /= len(dataset_train) epoch_kld_loss /= len(dataset_train) record = (ep_id, C, epoch_loss, epoch_rec_loss, epoch_kld_loss) print('ep=%d C=%.4f loss=%.4f rec_loss=%.4f kld_loss=%.4f' % record) with open(log_file_name, 'a') as f: f.write('ep=%d C=%.4f loss=%.4f rec_loss=%.4f kld_loss=%.4f\n' % record) # MODEL STORING if ep_id % args.save_each == 0: save_target( os.path.join(args.dir_model, 'trained_%d.model' % (ep_id))) save_charvec( os.path.join(args.dir_model, 'charvec_%d.pklb' % (ep_id))) print('model saved.') print('training done.') save_target(os.path.join(args.dir_model, 'trained_end.model')) save_charvec(os.path.join(args.dir_model, 'charvec_end.pklb')) print('the last model saved.')
0.36202242, 0.05815253, 0.24430117, 0.75336765, 0.53273125, 0.90717045, 0.26057194, 0.17050579, 0.26934674, 0.69990416 ], [ 0.77141326, 0.23113243, 0.02778885, 0.35061881, 0.50881063, 0.2445026, 0.87910554, 0.58546545, 0.59878369, 0.03310525 ], [ 0.862771, 0.54340754, 0.79409784, 0.94202909, 0.12964679, 0.34659084, 0.18709705, 0.32934376, 0.69122394, 0.65063928 ], [ 0.60503851, 0.44435167, 0.96214351, 0.28996983, 0.99434833, 0.89644887, 0.11432005, 0.65593003, 0.15861048, 0.51386829 ]] gold = Variable(xp.array([0, 0, 1, 1, 1], dtype=xp.int32)) discriminator = discriminator(10, dropout=0.0) optimizer = optimizers.AdaGrad() optimizer.use_cleargrads() optimizer.setup(discriminator) optimizer.add_hook(WeightDecay(0.0001)) for i in range(1, args.epoch + 1): system_output = discriminator(h) loss = F.softmax_cross_entropy(system_output, gold) adversarial.cleargrads() loss.backward() optimizer.update() print(loss, F.argmax(system_output, axis=1))
def set_optimizer(model): optimizer = optimizers.AdaGrad() optimizer.use_cleargrads() optimizer.setup(model) optimizer.add_hook(WeightDecay(0.0001)) return optimizer