def main(): parser = argparse.ArgumentParser() parser.add_argument('--gpu', '-g', default=-1, type=int, help='GPU ID (negative value indicates CPU)') parser.add_argument('--unit', '-u', default=100, type=int, help='number of units') parser.add_argument('--window', '-w', default=5, type=int, help='window size') parser.add_argument('--batchsize', '-b', type=int, default=1000, help='learning minibatch size') parser.add_argument('--epoch', '-e', default=20, type=int, help='number of epochs to learn') parser.add_argument('--model', '-m', choices=['skipgram', 'cbow'], default='skipgram', help='model type ("skipgram", "cbow")') parser.add_argument('--negative-size', default=5, type=int, help='number of negative samples') parser.add_argument('--out-type', '-o', choices=['hsm', 'ns', 'original'], default='hsm', help='output model type ("hsm": hierarchical softmax, ' '"ns": negative sampling, "original": ' 'no approximation)') parser.add_argument('--out', default='result', help='Directory to output the result') parser.add_argument('--test', dest='test', action='store_true') parser.add_argument('--wakati_corpus_list') parser.add_argument('--num_tokens', type=int, default=None, help='If not set, we count words as the 1st-pash.') parser.add_argument('--word_count_threshold', default=5, type=int) parser.set_defaults(test=False) args = parser.parse_args() if args.gpu >= 0: chainer.cuda.get_device_from_id(args.gpu).use() cuda.check_cuda_available() print('GPU: {}'.format(args.gpu)) print('# unit: {}'.format(args.unit)) print('Window: {}'.format(args.window)) print('Minibatch-size: {}'.format(args.batchsize)) print('# epoch: {}'.format(args.epoch)) print('Training model: {}'.format(args.model)) print('Output type: {}'.format(args.out_type)) print('') if args.gpu >= 0: cuda.get_device_from_id(args.gpu).use() wakati_corpus_list = [line.rstrip() for line in open(args.wakati_corpus_list, 'r').readlines() if not re.match('^\s*#', line)] # Create vocab. vocab = word2vec_module.create_vocab(wakati_corpus_list, count_threshold=args.word_count_threshold) index2word = dict([(wid, word) for (word, wid) in vocab.items()]) # Load the dataset words_generator = word2vec_module.WordsGenerator(wakati_corpus_list, batch_size=1000) class WidsGenerator: def __init__(self, words_generator, vocab): self.words_generator = words_generator self.vocab = vocab def __call__(self): for words in self.words_generator(): wids = [vocab[word] if word in vocab else 0 for word in words] yield wids class WidGenerator: def __init__(self, wids_generator): self.wids_generator = wids_generator def __call__(self): for wids in self.wids_generator(): for wid in wids: yield wid wids_generator = WidsGenerator(words_generator, vocab) # Generator call returns iterator object. wid_generator = WidGenerator(wids_generator) # train, val, _ = chainer.datasets.get_ptb_words() num_tokens = len([wid for wid in wid_generator()]) if args.num_tokens is None else args.num_tokens train = itertools.islice(wid_generator(), min(int(num_tokens*0.05), 10000), sys.maxsize) val = itertools.islice(wid_generator(), 0, min(int(num_tokens*0.05), 10000)) counts = collections.Counter(wid_generator()) # counts.update(collections.Counter(WidGenerator(val)())) # n_vocab = max(train) + 1 n_vocab = len(vocab) # if args.test: # train = train[:100] # val = val[:100] print('n_vocab: %d' % n_vocab) # print('data length: %d' % len(train)) if args.out_type == 'hsm': HSM = L.BinaryHierarchicalSoftmax tree = HSM.create_huffman_tree(counts) loss_func = HSM(args.unit, tree) loss_func.W.data[...] = 0 elif args.out_type == 'ns': cs = [counts[w] for w in range(len(counts))] loss_func = L.NegativeSampling(args.unit, cs, args.negative_size) loss_func.W.data[...] = 0 elif args.out_type == 'original': loss_func = SoftmaxCrossEntropyLoss(args.unit, n_vocab) else: raise Exception('Unknown output type: {}'.format(args.out_type)) # Choose the model if args.model == 'skipgram': model = SkipGram(n_vocab, args.unit, loss_func) elif args.model == 'cbow': model = ContinuousBoW(n_vocab, args.unit, loss_func) else: raise Exception('Unknown model type: {}'.format(args.model)) if args.gpu >= 0: model.to_gpu() # Set up an optimizer optimizer = O.Adam() optimizer.setup(model) # Set up an iterator train = itertools.islice(wids_generator(), min(int(num_tokens*0.05), 10000), sys.maxsize) val = itertools.islice(wids_generator(), 0, min(int(num_tokens*0.05), 10000)) train_iter = WindowIteratorIterator(train, args.window, args.batchsize) val_iter = WindowIteratorIterator(val, args.window, args.batchsize, repeat=False) # train_iter = WindowIterator(train, args.window, args.batchsize) # val_iter = WindowIterator(val, args.window, args.batchsize, repeat=False) # Set up an updater updater = training.updater.StandardUpdater( train_iter, optimizer, converter=convert, device=args.gpu) # Set up a trainer trainer = training.Trainer(updater, (args.epoch, 'epoch'), out=args.out) trainer.extend(extensions.Evaluator( val_iter, model, converter=convert, device=args.gpu)) trainer.extend(extensions.LogReport()) trainer.extend(extensions.PrintReport( ['epoch', 'main/loss', 'validation/main/loss'])) trainer.extend(extensions.ProgressBar()) trainer.run() # Save the word2vec model with open('word2vec.model', 'w') as f: f.write('%d %d\n' % (len(index2word), args.unit)) w = cuda.to_cpu(model.embed.W.data) for i, wi in enumerate(w): v = ' '.join(map(str, wi)) f.write('%s %s\n' % (index2word[i], v))
def train(args): '''Run training''' # display chainer version logging.info('chainer version = ' + chainer.__version__) # seed setting (chainer seed may not need it) os.environ['CHAINER_SEED'] = str(args.seed) logging.info('chainer seed = ' + os.environ['CHAINER_SEED']) # debug mode setting # 0 would be fastest, but 1 seems to be reasonable # by considering reproducability # revmoe type check if args.debugmode < 2: chainer.config.type_check = False logging.info('chainer type check is disabled') # use determinisitic computation or not if args.debugmode < 1: chainer.config.cudnn_deterministic = False logging.info('chainer cudnn deterministic is disabled') else: chainer.config.cudnn_deterministic = True # check cuda and cudnn availability if not chainer.cuda.available: logging.warning('cuda is not available') if not chainer.cuda.cudnn_enabled: logging.warning('cudnn is not available') # get input and output dimension info with open(args.valid_json, 'rb') as f: valid_json = json.load(f)['utts'] utts = list(valid_json.keys()) idim = int(valid_json[utts[0]]['input'][0]['shape'][1]) odim = int(valid_json[utts[0]]['output'][0]['shape'][1]) logging.info('#input dims : ' + str(idim)) logging.info('#output dims: ' + str(odim)) # check attention type if args.atype not in ['noatt', 'dot', 'location']: raise NotImplementedError( 'chainer supports only noatt, dot, and location attention.') # specify attention, CTC, hybrid mode if args.mtlalpha == 1.0: mtl_mode = 'ctc' logging.info('Pure CTC mode') elif args.mtlalpha == 0.0: mtl_mode = 'att' logging.info('Pure attention mode') else: mtl_mode = 'mtl' logging.info('Multitask learning mode') # specify model architecture e2e = E2E(idim, odim, args) model = Loss(e2e, args.mtlalpha) # write model config if not os.path.exists(args.outdir): os.makedirs(args.outdir) model_conf = args.outdir + '/model.json' with open(model_conf, 'wb') as f: logging.info('writing a model config file to ' + model_conf) f.write( json.dumps((idim, odim, vars(args)), indent=4, sort_keys=True).encode('utf_8')) for key in sorted(vars(args).keys()): logging.info('ARGS: ' + key + ': ' + str(vars(args)[key])) # Set gpu ngpu = args.ngpu if ngpu == 1: gpu_id = 0 # Make a specified GPU current chainer.cuda.get_device_from_id(gpu_id).use() model.to_gpu() # Copy the model to the GPU logging.info('single gpu calculation.') elif ngpu > 1: gpu_id = 0 devices = {'main': gpu_id} for gid in six.moves.xrange(1, ngpu): devices['sub_%d' % gid] = gid logging.info('multi gpu calculation (#gpus = %d).' % ngpu) logging.info('batch size is automatically increased (%d -> %d)' % (args.batch_size, args.batch_size * args.ngpu)) else: gpu_id = -1 logging.info('cpu calculation') # Setup an optimizer if args.opt == 'adadelta': optimizer = chainer.optimizers.AdaDelta(eps=args.eps) elif args.opt == 'adam': optimizer = chainer.optimizers.Adam() optimizer.setup(model) optimizer.add_hook(chainer.optimizer.GradientClipping(args.grad_clip)) # read json data with open(args.train_json, 'rb') as f: train_json = json.load(f)['utts'] with open(args.valid_json, 'rb') as f: valid_json = json.load(f)['utts'] # set up training iterator and updater converter = CustomConverter(e2e.subsample[0]) if ngpu <= 1: # make minibatch list (variable length) train = make_batchset(train_json, args.batch_size, args.maxlen_in, args.maxlen_out, args.minibatches) # hack to make batchsize argument as 1 # actual batchsize is included in a list if args.n_iter_processes > 0: train_iter = chainer.iterators.MultiprocessIterator( TransformDataset(train, converter.transform), batch_size=1, n_processes=args.n_iter_processes, n_prefetch=8, maxtasksperchild=20) else: train_iter = chainer.iterators.SerialIterator(TransformDataset( train, converter.transform), batch_size=1) # set up updater updater = CustomUpdater(train_iter, optimizer, converter=converter, device=gpu_id) else: # set up minibatches train_subsets = [] for gid in six.moves.xrange(ngpu): # make subset train_json_subset = { k: v for i, (k, v) in enumerate(train_json.items()) if i % ngpu == gid } # make minibatch list (variable length) train_subsets += [ make_batchset(train_json_subset, args.batch_size, args.maxlen_in, args.maxlen_out, args.minibatches) ] # each subset must have same length for MultiprocessParallelUpdater maxlen = max([len(train_subset) for train_subset in train_subsets]) for train_subset in train_subsets: if maxlen != len(train_subset): for i in six.moves.xrange(maxlen - len(train_subset)): train_subset += [train_subset[i]] # hack to make batchsize argument as 1 # actual batchsize is included in a list if args.n_iter_processes > 0: train_iters = [ chainer.iterators.MultiprocessIterator( TransformDataset(train_subsets[gid], converter.transform), batch_size=1, n_processes=args.n_iter_processes, n_prefetch=8, maxtasksperchild=20) for gid in six.moves.xrange(ngpu) ] else: train_iters = [ chainer.iterators.SerialIterator(TransformDataset( train_subsets[gid], converter.transform), batch_size=1) for gid in six.moves.xrange(ngpu) ] # set up updater updater = CustomParallelUpdater(train_iters, optimizer, converter=converter, devices=devices) # Set up a trainer trainer = training.Trainer(updater, (args.epochs, 'epoch'), out=args.outdir) # Resume from a snapshot if args.resume: chainer.serializers.load_npz(args.resume, trainer) # set up validation iterator valid = make_batchset(valid_json, args.batch_size, args.maxlen_in, args.maxlen_out, args.minibatches) if args.n_iter_processes > 0: valid_iter = chainer.iterators.MultiprocessIterator( TransformDataset(valid, converter.transform), batch_size=1, repeat=False, shuffle=False, n_processes=args.n_iter_processes, n_prefetch=8, maxtasksperchild=20) else: valid_iter = chainer.iterators.SerialIterator(TransformDataset( valid, converter.transform), batch_size=1, repeat=False, shuffle=False) # Evaluate the model with the test dataset for each epoch trainer.extend( extensions.Evaluator(valid_iter, model, converter=converter, device=gpu_id)) # Save attention weight each epoch if args.num_save_attention > 0 and args.mtlalpha != 1.0: data = sorted(list(valid_json.items())[:args.num_save_attention], key=lambda x: int(x[1]['input'][0]['shape'][1]), reverse=True) if hasattr(model, "module"): att_vis_fn = model.module.predictor.calculate_all_attentions else: att_vis_fn = model.predictor.calculate_all_attentions trainer.extend(PlotAttentionReport(att_vis_fn, data, args.outdir + "/att_ws", converter=converter, device=gpu_id), trigger=(1, 'epoch')) # Take a snapshot for each specified epoch trainer.extend( extensions.snapshot(filename='snapshot.ep.{.updater.epoch}'), trigger=(1, 'epoch')) # Make a plot for training and validation values trainer.extend( extensions.PlotReport([ 'main/loss', 'validation/main/loss', 'main/loss_ctc', 'validation/main/loss_ctc', 'main/loss_att', 'validation/main/loss_att' ], 'epoch', file_name='loss.png')) trainer.extend( extensions.PlotReport(['main/acc', 'validation/main/acc'], 'epoch', file_name='acc.png')) # Save best models trainer.extend( extensions.snapshot_object(model, 'model.loss.best'), trigger=training.triggers.MinValueTrigger('validation/main/loss')) if mtl_mode is not 'ctc': trainer.extend( extensions.snapshot_object(model, 'model.acc.best'), trigger=training.triggers.MaxValueTrigger('validation/main/acc')) # epsilon decay in the optimizer if args.opt == 'adadelta': if args.criterion == 'acc' and mtl_mode is not 'ctc': trainer.extend(restore_snapshot(model, args.outdir + '/model.acc.best'), trigger=CompareValueTrigger( 'validation/main/acc', lambda best_value, current_value: best_value > current_value)) trainer.extend(adadelta_eps_decay(args.eps_decay), trigger=CompareValueTrigger( 'validation/main/acc', lambda best_value, current_value: best_value > current_value)) elif args.criterion == 'loss': trainer.extend(restore_snapshot(model, args.outdir + '/model.loss.best'), trigger=CompareValueTrigger( 'validation/main/loss', lambda best_value, current_value: best_value < current_value)) trainer.extend(adadelta_eps_decay(args.eps_decay), trigger=CompareValueTrigger( 'validation/main/loss', lambda best_value, current_value: best_value < current_value)) # Write a log of evaluation statistics for each epoch trainer.extend(extensions.LogReport(trigger=(REPORT_INTERVAL, 'iteration'))) report_keys = [ 'epoch', 'iteration', 'main/loss', 'main/loss_ctc', 'main/loss_att', 'validation/main/loss', 'validation/main/loss_ctc', 'validation/main/loss_att', 'main/acc', 'validation/main/acc', 'elapsed_time' ] if args.opt == 'adadelta': trainer.extend(extensions.observe_value( 'eps', lambda trainer: trainer.updater.get_optimizer('main').eps), trigger=(REPORT_INTERVAL, 'iteration')) report_keys.append('eps') trainer.extend(extensions.PrintReport(report_keys), trigger=(REPORT_INTERVAL, 'iteration')) trainer.extend(extensions.ProgressBar(update_interval=REPORT_INTERVAL)) # Run the training trainer.run()
model = ConvNet(input_size, hidden_size, num_classes) model = L.Classifier( model) # L.Classifier abstracts softmax and crossentropy loss. if device == 'gpu': model.to_gpu(device_id) optimizer = chainer.optimizers.SGD(lr=learning_rate) optimizer.setup(model) updater = training.StandardUpdater( train_iter, optimizer, device=device_id) # training data is transported to device. trainer = training.Trainer(updater, stop_trigger=(num_epochs, 'epoch'), out='mnist_result') trainer.extend(extensions.LogReport( )) # log reports. reports are given by Reporter's instances in trainer. trainer.extend(extensions.Evaluator(test_iter, model, device=device_id), trigger=(num_epochs, 'epoch')) # Evaluate the model using test_iter as validation data at the end of training trainer.extend( extensions.PrintReport([ 'epoch', 'main/loss', 'main/accuracy', 'validation/main/loss', 'validation/main/accuracy' ])) # print statistics in evely epoch. # trainer.extend(extensions.PlotReport(['main/loss', 'validation/main/loss'], x_key='epoch', file_name='loss.png')) # trainer.extend(extensions.PlotReport(['main/accuracy', 'validation/main/accuracy'], x_key='epoch', file_name='accuracy.png')) # trainer.extend(extensions.dump_graph('main/loss')) trainer.run()
shuffle=False) # Define optimizers optimizer.setup(model) # Give the iterators and optimizers to updater updater = training.StandardUpdater(train_iter, optimizer) # Give trigger for early stopping stop_trigger = training.triggers.EarlyStoppingTrigger( monitor='validation/main/loss', max_trigger=(n_epoch, 'epoch')) # Give updater to trainer trainer = training.Trainer(updater, stop_trigger) trainer.extend(extensions.Evaluator(test_iter, model)) trainer.extend(extensions.dump_graph('main/loss')) trainer.extend(extensions.LogReport()) trainer.extend( extensions.PlotReport(['main/loss', 'validation/main/loss'], file_name='loss.png')) trainer.extend( extensions.PrintReport( ['epoch', 'main/loss', 'validation/main/loss', 'elapsed_time'])) # Train the model trainer.run() # Save trained model serializers.save_npz('my.model', model)
def main(args): # Initialize the model to train model = models.archs[args.arch]() if args.finetune and hasattr(model, 'finetuned_model_path'): utils.finetuning.load_param(model.finetuned_model_path, model, args.ignore) #model.finetune = True if args.initmodel: print('Load model from', args.initmodel) chainer.serializers.load_npz(args.initmodel, model) if args.gpu >= 0: chainer.cuda.get_device(args.gpu).use() model.to_gpu() nowt = datetime.datetime.today() outputdir = args.out + '/' + args.arch + '/' + nowt.strftime( "%Y%m%d-%H%M") + '_bs' + str(args.batchsize) if args.test and args.initmodel is not None: outputdir = os.path.dirname(args.initmodel) # Load the datasets and mean file mean = None if hasattr(model, 'mean_value'): mean = makeMeanImage(model.mean_value) else: mean = np.load(args.mean) assert mean is not None train = ppds.PreprocessedDataset(args.train, args.root, mean, model.insize) val = ppds.PreprocessedDataset(args.val, args.root, mean, model.insize, False) # These iterators load the images with subprocesses running in parallel to # the training/validation. train_iter = chainer.iterators.MultiprocessIterator( train, args.batchsize, shuffle=False, n_processes=args.loaderjob) #val_iter = chainer.iterators.MultiprocessIterator( # val, args.val_batchsize, repeat=False, shuffle=False, n_processes=args.loaderjob) val_iter = chainer.iterators.SerialIterator(val, args.val_batchsize, repeat=False, shuffle=False) # Set up an optimizer optimizer = optimizers[args.opt]() #if args.opt == 'momentumsgd': if hasattr(optimizer, 'lr'): optimizer.lr = args.baselr if hasattr(optimizer, 'momentum'): optimizer.momentum = args.momentum optimizer.setup(model) # Set up a trainer updater = training.StandardUpdater(train_iter, optimizer, device=args.gpu) trainer = training.Trainer(updater, (args.epoch, 'epoch'), outputdir) #val_interval = (10 if args.test else int(len(train) / args.batchsize)), 'iteration' val_interval = (10, 'iteration') if args.test else (1, 'epoch') snapshot_interval = (10, 'iteration') if args.test else (4, 'epoch') log_interval = (10 if args.test else 200), 'iteration' # Copy the chain with shared parameters to flip 'train' flag only in test eval_model = model.copy() eval_model.train = False if not args.test: val_evaluator = extensions.Evaluator(val_iter, eval_model, device=args.gpu) else: val_evaluator = utils.EvaluatorPlus(val_iter, eval_model, device=args.gpu) if 'googlenet' in args.arch: val_evaluator.lastname = 'validation/main/loss3' trainer.extend(val_evaluator, trigger=val_interval) trainer.extend(extensions.dump_graph('main/loss')) trainer.extend(extensions.snapshot(), trigger=snapshot_interval) trainer.extend(extensions.snapshot_object( model, 'model_iter_{.updater.iteration}'), trigger=(500, 'iteration')) # Be careful to pass the interval directly to LogReport # (it determines when to emit log rather than when to read observations) trainer.extend(extensions.LogReport(trigger=log_interval)) trainer.extend(extensions.PrintReport([ 'epoch', 'iteration', 'main/loss', 'validation/main/loss', 'main/accuracy', 'validation/main/accuracy', ]), trigger=log_interval) trainer.extend(extensions.ProgressBar(update_interval=10)) if args.opt == 'momentumsgd': trainer.extend(extensions.ExponentialShift('lr', args.gamma), trigger=(1, 'epoch')) if args.resume: chainer.serializers.load_npz(args.resume, trainer) if not args.test: chainer.serializers.save_npz(outputdir + '/model0', model) trainer.run() chainer.serializers.save_npz(outputdir + '/model', model) with open(outputdir + '/args.txt', 'w') as o: print(args, file=o) results = val_evaluator(trainer) results['outputdir'] = outputdir if args.test: print(val_evaluator.confmat) categories = utils.io.load_categories(args.categories) confmat_csv_name = args.initmodel + '.csv' confmat_fig_name = args.initmodel + '.eps' utils.io.save_confmat_csv(confmat_csv_name, val_evaluator.confmat, categories) utils.io.save_confmat_fig(confmat_fig_name, val_evaluator.confmat, categories, mode="rate", saveFormat="eps") return results
def main(): parser = argparse.ArgumentParser(description='ChainerMN example: MNIST') parser.add_argument('--batchsize', '-b', type=int, default=100, help='Number of images in each mini-batch') parser.add_argument('--communicator', type=str, default='pure_nccl', help='Type of communicator') parser.add_argument('--epoch', '-e', type=int, default=20, help='Number of sweeps over the dataset to train') parser.add_argument('--gpu', '-g', action='store_true', help='Use GPU') parser.add_argument('--out', '-o', default='result', help='Directory to output the result') parser.add_argument('--resume', '-r', default='', help='Resume the training from snapshot') parser.add_argument('--unit', '-u', type=int, default=1000, help='Number of units') args = parser.parse_args() # Prepare ChainerMN communicator. if args.gpu: if args.communicator == 'naive': print('Error: \'naive\' communicator does not support GPU.\n') exit(-1) comm = chainermn.create_communicator(args.communicator) device = comm.intra_rank else: if args.communicator != 'naive': print('Warning: using naive communicator ' 'because only naive supports CPU-only execution') comm = chainermn.create_communicator('naive') device = -1 if comm.rank == 0: print('==========================================') print('Num process (COMM_WORLD): {}'.format(comm.size)) if args.gpu: print('Using GPUs') print('Using {} communicator'.format(args.communicator)) print('Num unit: {}'.format(args.unit)) print('Num Minibatch-size: {}'.format(args.batchsize)) print('Num epoch: {}'.format(args.epoch)) print('==========================================') model = L.Classifier(MLP(args.unit, 10)) if device >= 0: chainer.cuda.get_device_from_id(device).use() model.to_gpu() # Create a multi node optimizer from a standard Chainer optimizer. optimizer = chainermn.create_multi_node_optimizer( chainer.optimizers.Adam(), comm) optimizer.setup(model) # Split and distribute the dataset. Only worker 0 loads the whole dataset. # Datasets of worker 0 are evenly split and distributed to all workers. if comm.rank == 0: train, test = chainer.datasets.get_mnist() else: train, test = None, None train = chainermn.scatter_dataset(train, comm, shuffle=True) test = chainermn.scatter_dataset(test, comm, shuffle=True) train_iter = chainer.iterators.SerialIterator(train, args.batchsize) test_iter = chainer.iterators.SerialIterator(test, args.batchsize, repeat=False, shuffle=False) updater = training.StandardUpdater(train_iter, optimizer, device=device) trainer = training.Trainer(updater, (args.epoch, 'epoch'), out=args.out) # Create a multi node evaluator from a standard Chainer evaluator. evaluator = extensions.Evaluator(test_iter, model, device=device) evaluator = chainermn.create_multi_node_evaluator(evaluator, comm) trainer.extend(evaluator) # Some display and output extensions are necessary only for one worker. # (Otherwise, there would just be repeated outputs.) if comm.rank == 0: trainer.extend(extensions.DumpGraph('main/loss')) trainer.extend(extensions.LogReport()) trainer.extend( extensions.PrintReport([ 'epoch', 'main/loss', 'validation/main/loss', 'main/accuracy', 'validation/main/accuracy', 'elapsed_time' ])) trainer.extend(extensions.ProgressBar()) if args.resume: chainer.serializers.load_npz(args.resume, trainer) trainer.run()
def main(): parser = argparse.ArgumentParser( description='Chainer example: Fashion-MNIST') parser.add_argument('--gpu', '-g', type=int, default=-1, help='GPU ID (negative value indicates CPU)') parser.add_argument('--batchsize', '-b', type=int, default=128, help='Number of images in each mini-batch') parser.add_argument('--epoch', '-e', type=int, default=20, help='Number of sweeps over the dataset to train') parser.add_argument('--out', '-o', default='result', help='Directory to output the result') parser.add_argument('--resume', '-r', default='', help='Resume the training from snapshot') parser.add_argument('--unit', '-u', type=int, default=1024, help='Number of units') parser.add_argument('--optimizer', '-op', choices=('SGD', 'MomentumSGD', 'NesterovAG', 'AdaGrad', 'AdaDelta', 'RMSprop', 'Adam'), default='MomentumSGD', help='optimization type') parser.add_argument('--model', '-m', choices=('MLP', 'CNN', 'VGG'), default='MLP', help='model type') parser.add_argument('--activation', '-a', choices=('sigmoid', 'tanh', 'relu', 'leaky_relu', 'elu'), default='relu') parser.add_argument('--random_angle', type=float, default=15.0) parser.add_argument('--expand_ratio', type=float, default=1.2) parser.add_argument('--crop_size', type=int, nargs='*', default=[28, 28]) args = parser.parse_args() print('GPU: {}'.format(args.gpu)) print('unit: {}'.format(args.unit)) print('batch-size: {}'.format(args.batchsize)) print('epoch: {}'.format(args.epoch)) print('optimizer: {}'.format(args.optimizer)) print('model type: {}'.format(args.model)) print('activation: {}'.format(args.activation)) print('') # Activation if args.activation == 'sigmoid': activation = F.sigmoid elif args.activation == 'tanh': activation = F.tanh elif args.activation == 'relu': activation = F.relu elif args.activation == 'leaky_relu': activation = F.leaky_relu elif args.activation == 'elu': activation = F.elu # Model if args.model == 'MLP': model = MLP(args.unit, 10, activation) elif args.model == 'CNN': model = CNN(10) elif args.model == 'VGG': model = VGG(10) model = L.Classifier(model) if args.gpu >= 0: chainer.cuda.get_device(args.gpu).use() model.to_gpu() # Optimizer if args.optimizer == 'SGD': optimizer = chainer.optimizers.SGD() elif args.optimizer == 'MomentumSGD': optimizer = chainer.optimizers.MomentumSGD() elif args.optimizer == 'NesterovAG': optimizer = chainer.optimizers.NesterovAG() elif args.optimizer == 'AdaGrad': optimizer = chainer.optimizers.AdaGrad() elif args.optimizer == 'AdaDelta': optimizer = chainer.optimizers.AdaDelta() elif args.optimizer == 'RMSprop': optimizer = chainer.optimizers.RMSprop() elif args.optimizer == 'Adam': optimizer = chainer.optimizers.Adam() optimizer.setup(model) # Load_Dataset train, test = load_dataset() # Output_Original_Images #output(train=train, file="./result/fmnist_original.png") #sys.exit() # Preprocess mean = np.mean([x for x, _ in train], axis=(0, 2, 3)) std = np.std([x for x, _ in train], axis=(0, 2, 3)) train_transform = partial(transform, mean=mean, std=std, random_angle=args.random_angle, crop_size=args.crop_size, train=True) test_transform = partial(transform, mean=mean, std=std, train=False) train = TransformDataset(train, train_transform) test = TransformDataset(test, test_transform) # Output_Transformed_Images #output(train=train, file="./result/fmnist_transform.png") #sys.exit() # Iterator train_iter = chainer.iterators.SerialIterator(train, args.batchsize) test_iter = chainer.iterators.SerialIterator(test, args.batchsize, repeat=False, shuffle=False) # Trainer updater = training.StandardUpdater(train_iter, optimizer, device=args.gpu) trainer = training.Trainer(updater, (args.epoch, 'epoch'), out=args.out) # Extensions trainer.extend(extensions.Evaluator(test_iter, model, device=args.gpu)) trainer.extend(extensions.dump_graph('main/loss')) trainer.extend(extensions.snapshot(), trigger=(args.epoch, 'epoch')) trainer.extend(extensions.LogReport()) trainer.extend( extensions.PlotReport(['main/loss', 'validation/main/loss'], 'epoch', file_name='loss.png')) trainer.extend( extensions.PlotReport(['main/accuracy', 'validation/main/accuracy'], 'epoch', file_name='accuracy.png')) trainer.extend( extensions.PrintReport([ 'epoch', 'main/loss', 'validation/main/loss', 'main/accuracy', 'validation/main/accuracy', 'elapsed_time' ])) trainer.extend(extensions.ProgressBar()) if args.resume: chainer.serializers.load_npz(args.resume, trainer) # Run_Training trainer.run()
raise Exception('Unknown model type: {}'.format(args.model)) if args.gpu >= 0: model.to_gpu() optimizer = O.Adam() optimizer.setup(model) train_iter = WindowIterator(train, args.window, args.batchsize) val_iter = WindowIterator(val, args.window, args.batchsize, repeat=False) updater = training.StandardUpdater(train_iter, optimizer, converter=convert, device=args.gpu) trainer = training.Trainer(updater, (args.epoch, 'epoch'), out=args.out) trainer.extend( extensions.Evaluator(val_iter, model, converter=convert, device=args.gpu)) trainer.extend(extensions.LogReport()) trainer.extend( extensions.PrintReport(['epoch', 'main/loss', 'validation/main/loss'])) trainer.extend(extensions.ProgressBar()) trainer.run() with open('word2vec.model', 'w') as f: f.write('%d %d\n' % (len(index2word), args.unit)) w = cuda.to_cpu(model.embed.W.data) for i, wi in enumerate(w): v = ' '.join(map(str, wi)) f.write('%s %s\n' % (index2word[i], v))
def main(): archs = { 'alex': alex.Alex, 'googlenet': googlenet.GoogLeNet, 'googlenetbn': googlenetbn.GoogLeNetBN, 'nin': nin.NIN, 'resnet50': resnet50.ResNet50, 'resnext50': resnext50.ResNeXt50, 'resnet50_nhwc': resnet50.ResNet50_Nhwc, } dtypes = { 'float16': np.float16, 'float32': np.float32, 'float64': np.float64, } parser = argparse.ArgumentParser( description='Learning convnet from ILSVRC2012 dataset') parser.add_argument('train', help='Path to training image-label list file') parser.add_argument('val', help='Path to validation image-label list file') parser.add_argument('--arch', '-a', choices=archs.keys(), default='nin', help='Convnet architecture') parser.add_argument('--batchsize', '-B', type=int, default=32, help='Learning minibatch size') parser.add_argument('--dtype', choices=dtypes, help='Specify the dtype ' 'used. If not supplied, the default dtype is used') parser.add_argument('--epoch', '-E', type=int, default=10, help='Number of epochs to train') parser.add_argument('--device', '-d', type=str, default='-1', help='Device specifier. Either ChainerX device ' 'specifier or an integer. If non-negative integer, ' 'CuPy arrays with specified device id are used. If ' 'negative integer, NumPy arrays are used') parser.add_argument('--initmodel', help='Initialize the model from given file') parser.add_argument('--loaderjob', '-j', type=int, help='Number of parallel data loading processes') parser.add_argument('--mean', '-m', default='mean.npy', help='Mean file (computed by compute_mean.py)') parser.add_argument('--resume', '-r', default='', help='Initialize the trainer from given file') parser.add_argument('--out', '-o', default='result', help='Output directory') parser.add_argument('--root', '-R', default='.', help='Root directory path of image files') parser.add_argument('--val_batchsize', '-b', type=int, default=250, help='Validation minibatch size') parser.add_argument('--test', action='store_true') parser.set_defaults(test=False) parser.add_argument('--dali', action='store_true') parser.set_defaults(dali=False) group = parser.add_argument_group('deprecated arguments') group.add_argument('--gpu', '-g', dest='device', type=int, nargs='?', const=0, help='GPU ID (negative value indicates CPU)') args = parser.parse_args() device = chainer.get_device(args.device) # Set the dtype if supplied. if args.dtype is not None: chainer.config.dtype = args.dtype print('Device: {}'.format(device)) print('Dtype: {}'.format(chainer.config.dtype)) print('# Minibatch-size: {}'.format(args.batchsize)) print('# epoch: {}'.format(args.epoch)) print('') # Initialize the model to train model = archs[args.arch]() if args.initmodel: print('Load model from {}'.format(args.initmodel)) chainer.serializers.load_npz(args.initmodel, model) model.to_device(device) device.use() # Load the mean file mean = np.load(args.mean) if args.dali: if not dali_util._dali_available: raise RuntimeError('DALI seems not available on your system.') if device.xp is not chainer.backend.cuda.cupy: raise RuntimeError('Using DALI requires GPU device. Please ' 'specify it with --device option.') n_threads = args.loaderjob if n_threads is None or n_threads <= 0: n_threads = 1 ch_mean = list(np.average(mean, axis=(1, 2))) ch_std = [255.0, 255.0, 255.0] # Setup DALI pipelines train_pipe = dali_util.DaliPipelineTrain(args.train, args.root, model.insize, args.batchsize, n_threads, device.device.id, True, mean=ch_mean, std=ch_std) val_pipe = dali_util.DaliPipelineVal(args.val, args.root, model.insize, args.val_batchsize, n_threads, device.device.id, False, mean=ch_mean, std=ch_std) train_iter = chainer.iterators.DaliIterator(train_pipe) val_iter = chainer.iterators.DaliIterator(val_pipe, repeat=False) # converter = dali_converter converter = dali_util.DaliConverter(mean=mean, crop_size=model.insize) else: # Load the dataset files train = PreprocessedDataset(args.train, args.root, mean, model.insize) val = PreprocessedDataset(args.val, args.root, mean, model.insize, False) # These iterators load the images with subprocesses running in parallel # to the training/validation. train_iter = chainer.iterators.MultiprocessIterator( train, args.batchsize, n_processes=args.loaderjob) val_iter = chainer.iterators.MultiprocessIterator( val, args.val_batchsize, repeat=False, n_processes=args.loaderjob) converter = dataset.concat_examples # Set up an optimizer optimizer = chainer.optimizers.MomentumSGD(lr=0.01, momentum=0.9) optimizer.setup(model) # Set up a trainer updater = training.updaters.StandardUpdater(train_iter, optimizer, converter=converter, device=device) trainer = training.Trainer(updater, (args.epoch, 'epoch'), args.out) val_interval = (100000, 'iteration') log_interval = (1000, 'iteration') if args.test: val_interval = (1, 'iteration') log_interval = (1, 'iteration') # BEGIN ADDITIONAL TEST CODE val_interval = (1, 'iteration') log_interval = (1, 'iteration') # END ADDITIONAL TEST CODE trainer.extend(extensions.Evaluator(val_iter, model, converter=converter, device=device), trigger=val_interval) # TODO(sonots): Temporarily disabled for chainerx. Fix it. if device.xp is not chainerx: trainer.extend(extensions.DumpGraph('main/loss')) trainer.extend(extensions.snapshot(), trigger=val_interval) trainer.extend(extensions.snapshot_object( model, 'model_iter_{.updater.iteration}'), trigger=val_interval) # Be careful to pass the interval directly to LogReport # (it determines when to emit log rather than when to read observations) trainer.extend(extensions.LogReport(trigger=log_interval)) trainer.extend(extensions.observe_lr(), trigger=log_interval) trainer.extend(extensions.PrintReport([ 'epoch', 'iteration', 'main/loss', 'validation/main/loss', 'main/accuracy', 'validation/main/accuracy', 'lr' ]), trigger=log_interval) trainer.extend(extensions.ProgressBar(update_interval=10)) if args.resume: chainer.serializers.load_npz(args.resume, trainer) trainer.run()
max_epoch = 15 model = L.Classifier(model) optimizer = optimizers.MomentumSGD(lr=0.01, momentum=0.9) optimizer.setup(model) is_decay_lr = False updater = training.updaters.StandardUpdater(train_iter, optimizer, device=gpu_id) result_dir = '../results/transfer_hard_{}_{}_{}_depth{}_valid{}'.format(mode, n_topic, iteration, sum(depth)*2+1, args.valid) trainer = training.Trainer(updater, (epoch_size * max_epoch, 'iteration'), out=result_dir) from chainer.training import extensions trainer.extend(extensions.LogReport(trigger=(epoch_size, 'iteration'))) trainer.extend(extensions.snapshot(filename='snapshot_iteration-{.updater.iteration}'), trigger=(epoch_size, 'iteration')) trainer.extend(extensions.snapshot_object(model.predictor, filename='model_iteration-{.updater.iteration}'), trigger=(epoch_size, 'iteration')) trainer.extend(extensions.Evaluator(test_iter, model, device=gpu_id), trigger=(epoch_size, 'iteration')) trainer.extend(extensions.observe_lr(), trigger=(epoch_size, 'iteration')) trainer.extend(extensions.PrintReport(['iteration', 'lr', 'main/accuracy', 'validation/main/accuracy', 'elapsed_time']), trigger=(epoch_size, 'iteration')) trainer.extend(extensions.dump_graph('main/loss')) trainer.extend(extensions.ExponentialShift('lr', 0.5), trigger=(epoch_size*3, 'iteration')) trainer.extend(extensions.ProgressBar(update_interval=30)) print('running') print('reslut_dir:{}'.format(result_dir)) trainer.run()
args.pca_sigma, args.random_angle, args.x_random_flip, args.y_random_flip, args.expand_ratio, args.random_crop_size, args.random_erase, args.output_size, args.batchsize, transform_with_softlabel) model_filename = None if args.caffe_model_path: model_filename = caffe2npz(args.caffe_model_path) elif args.model_file == 'models/resnet.py': model_filename = 'auto' model = create_model(args.model_file, args.model_name, n_class, model_filename, args.layers) net = DistillClassifier(model, lossfun_soft=softmax_cross_entropy_softlabel) evaluator = extensions.Evaluator(valid_iter, net, device=args.gpu_id) trainer = create_trainer(train_iter, net, args.gpu_id, args.initial_lr, args.weight_decay, args.freeze_layer, args.small_lr_layers, args.small_initial_lr, args.num_epochs_or_iter, args.epoch_or_iter, args.save_dir) if args.load_path: chainer.serializers.load_npz(args.load_path, trainer) trainer_extend(trainer, net, evaluator, args.small_lr_layers, args.lr_decay_rate, args.lr_decay_epoch, args.epoch_or_iter, args.save_trainer_interval) trainer.run()
def main(): parser = argparse.ArgumentParser() parser.add_argument('--gpu', '-g', default=-1, type=int, help='GPU ID (negative value indicates CPU)') parser.add_argument('--epoch', '-e', default=400, type=int, help='number of epochs to learn') parser.add_argument('--unit', '-u', default=30, type=int, help='number of units') parser.add_argument('--batchsize', '-b', type=int, default=25, help='learning minibatch size') parser.add_argument('--label', '-l', type=int, default=5, help='number of labels') parser.add_argument('--epocheval', '-p', type=int, default=5, help='number of epochs per evaluation') parser.add_argument('--test', dest='test', action='store_true') parser.set_defaults(test=False) args = parser.parse_args() n_epoch = args.epoch # number of epochs n_units = args.unit # number of units per layer batchsize = args.batchsize # minibatch size n_label = args.label # number of labels epoch_per_eval = args.epocheval # number of epochs per evaluation if args.test: max_size = 10 else: max_size = None vocab = {} train_data = [ convert_tree(vocab, tree) for tree in data.read_corpus('trees/train.txt', max_size) ] train_iter = chainer.iterators.SerialIterator(train_data, batchsize) validation_data = [ convert_tree(vocab, tree) for tree in data.read_corpus('trees/dev.txt', max_size) ] validation_iter = chainer.iterators.SerialIterator(validation_data, batchsize, repeat=False, shuffle=False) test_data = [ convert_tree(vocab, tree) for tree in data.read_corpus('trees/test.txt', max_size) ] model = RecursiveNet(len(vocab), n_units, n_label) if args.gpu >= 0: model.to_gpu() # Setup optimizer optimizer = optimizers.AdaGrad(lr=0.1) optimizer.setup(model) optimizer.add_hook(chainer.optimizer_hooks.WeightDecay(0.0001)) def _convert(batch, _): return batch # Setup updater updater = chainer.training.StandardUpdater(train_iter, optimizer, device=args.gpu, converter=_convert) # Setup trainer and run trainer = chainer.training.Trainer(updater, (n_epoch, 'epoch')) trainer.extend(extensions.Evaluator(validation_iter, model, device=args.gpu, converter=_convert), trigger=(epoch_per_eval, 'epoch')) trainer.extend(extensions.LogReport()) trainer.extend( extensions.MicroAverage('main/correct', 'main/total', 'main/accuracy')) trainer.extend( extensions.MicroAverage('validation/main/correct', 'validation/main/total', 'validation/main/accuracy')) trainer.extend( extensions.PrintReport([ 'epoch', 'main/loss', 'validation/main/loss', 'main/accuracy', 'validation/main/accuracy', 'elapsed_time' ])) trainer.run() print('Test evaluation') evaluate(model, test_data)
def main(): parser = argparse.ArgumentParser(description='Chainer example: MNIST') parser.add_argument('--batchsize', '-b', type=int, default=10, help='Number of images in each mini-batch') parser.add_argument('--epoch', '-e', type=int, default=50, help='Number of sweeps over the dataset to train') parser.add_argument('--frequency', '-f', type=int, default=-1, help='Frequency of taking a snapshot') parser.add_argument('--gpu', '-g', type=int, default=0, help='GPU ID (negative value indicates CPU)') parser.add_argument('--out', '-o', default='result', help='Directory to output the result') parser.add_argument('--resume', '-r', default='', help='Resume the training from snapshot') parser.add_argument('--unit', '-u', type=int, default=102400, help='Number of units') parser.add_argument('--inf', type=int, default=10) parser.add_argument('--outf', type=int, default=10) # parser.add_argument('--communicator', type=str,default='hierarchical', help='Type of communicator') args = parser.parse_args() print('GPU: {}'.format(args.gpu)) print('# unit: {}'.format(args.unit)) print('# Minibatch-size: {}'.format(args.batchsize)) print('# epoch: {}'.format(args.epoch)) print('') # # Setup an optimizer # optimizer = chainer.optimizers.Adam() # optimizer.setup(model) # #VGG = MLP(args.unit,101) #model = L.Classifier(VGG) model = MLP() # print(model.forward_lstm1.Wxi.W.data) # print(model.params()) # print(sum(p.data.size for p in model.params())) if args.gpu >= 0: chainer.cuda.get_device_from_id(args.gpu).use() model.to_gpu() # Copy the model to the GPU # train = ImageTrainDataset_UCF101() # train_iter = chainer.iterators.SerialIterator(train,args.batchsize) train = MovingMnistDataset(0, 7000) train_iter = chainer.iterators.SerialIterator(train, batch_size=args.batchsize, shuffle=True) test = MovingMnistDataset(7000, 10000) test_iter = chainer.iterators.SerialIterator(test, batch_size=args.batchsize, repeat=False, shuffle=False) #print(test_iter[0]) # Setup an optimizer #optimizer = chainer.optimizers.Adam() optimizer = chainer.optimizers.Adam() optimizer.setup(model) # model.vgg.disable_update() updater = training.updaters.StandardUpdater(train_iter, optimizer, device=args.gpu, loss_func=model.get_loss_func()) stop_trigger = (args.epoch,'epoch') trainer = training.Trainer(updater,stop_trigger,out=args.out) trainer.extend(extensions.Evaluator(test_iter, model, device=args.gpu, eval_func=model.get_loss_func()), name='val') trainer.extend(extensions.LogReport()) trainer.extend(extensions.PrintReport(['epoch','main/cross_entropy','elapsed_time','val/main/cross_entropy'])) trainer.extend(extensions.ProgressBar()) log_interval = (1,'epoch') trainer.extend(extensions.snapshot(),trigger=log_interval) trainer.extend(extensions.snapshot_object(model, filename='model_epoch-{.updater.epoch}')) # print(sum(p.data.size for p in model.params())) #trainer.extend(extensions.snapshot_object(model,'model_iter_{.updater.iteration}'),trigger=log_interval) if args.resume: chainer.serializers.load_npz(args.resume,trainer) trainer.run() chainer.serializers.save_npz("./result/mymodel.npz", model)
'label_dtype': np.int32, 'rgb_format': False } train_dataset = _preprocess_mnist(train_file, **preprocess_mnist_options) test_dataset = _preprocess_mnist(test_file, **preprocess_mnist_options) train_iter = chainer.iterators.SerialIterator(train_dataset, args.batch_size) test_iter = chainer.iterators.SerialIterator( test_dataset, args.batch_size, repeat=False, shuffle=False) updater = training.StandardUpdater(train_iter, optimizer, device=device) trainer = training.Trainer(updater, (args.epochs, 'epoch'), out=args.output_data_dir) # Create a multi node evaluator from a standard Chainer evaluator. evaluator = extensions.Evaluator(test_iter, model, device=device) evaluator = chainermn.create_multi_node_evaluator(evaluator, comm) trainer.extend(evaluator) # Some display and output extensions are necessary only for one worker. # (Otherwise, there would just be repeated outputs.) if comm.rank == 0: if extensions.PlotReport.available(): trainer.extend( extensions.PlotReport( ['main/loss', 'validation/main/loss'], 'epoch', file_name='loss.png')) trainer.extend( extensions.PlotReport( ['main/accuracy', 'validation/main/accuracy'],
def main(): parser = argparse.ArgumentParser() parser.add_argument('--batchsize', '-b', type=int, default=20, help='Number of examples in each mini-batch') parser.add_argument('--bproplen', '-l', type=int, default=35, help='Number of words in each mini-batch ' '(= length of truncated BPTT)') parser.add_argument('--epoch', '-e', type=int, default=39, help='Number of sweeps over the dataset to train') parser.add_argument('--gpu', '-g', type=int, default=-1, help='GPU ID (negative value indicates CPU)') parser.add_argument('--gradclip', '-c', type=float, default=5, help='Gradient norm threshold to clip') parser.add_argument('--out', '-o', default='result', help='Directory to output the result') parser.add_argument('--resume', '-r', default='', help='Resume the training from snapshot') parser.add_argument('--test', action='store_true', help='Use tiny datasets for quick tests') parser.set_defaults(test=False) parser.add_argument('--unit', '-u', type=int, default=650, help='Number of LSTM units in each layer') parser.add_argument('--model', '-m', default='model.npz', help='Model file name to serialize') args = parser.parse_args() # Load the Penn Tree Bank long word sequence dataset train, val, test = chainer.datasets.get_ptb_words() n_vocab = max(train) + 1 # train is just an array of integers print('#vocab = {}'.format(n_vocab)) if args.test: train = train[:100] val = val[:100] test = test[:100] train_iter = ParallelSequentialIterator(train, args.batchsize) val_iter = ParallelSequentialIterator(val, 1, repeat=False) test_iter = ParallelSequentialIterator(test, 1, repeat=False) # Prepare an RNNLM model rnn = RNNForLM(n_vocab, args.unit) model = L.Classifier(rnn) model.compute_accuracy = False # we only want the perplexity if args.gpu >= 0: # Make a specified GPU current chainer.backends.cuda.get_device_from_id(args.gpu).use() model.to_gpu() # Set up an optimizer optimizer = chainer.optimizers.SGD(lr=1.0) optimizer.setup(model) optimizer.add_hook(chainer.optimizer_hooks.GradientClipping(args.gradclip)) # Set up a trainer updater = BPTTUpdater(train_iter, optimizer, args.bproplen, args.gpu) trainer = training.Trainer(updater, (args.epoch, 'epoch'), out=args.out) eval_model = model.copy() # Model with shared params and distinct states eval_rnn = eval_model.predictor trainer.extend(extensions.Evaluator( val_iter, eval_model, device=args.gpu, # Reset the RNN state at the beginning of each evaluation eval_hook=lambda _: eval_rnn.reset_state())) interval = 10 if args.test else 500 trainer.extend(extensions.LogReport(postprocess=compute_perplexity, trigger=(interval, 'iteration'))) trainer.extend(extensions.PrintReport( ['epoch', 'iteration', 'perplexity', 'val_perplexity'] ), trigger=(interval, 'iteration')) trainer.extend(extensions.ProgressBar( update_interval=1 if args.test else 10)) trainer.extend(extensions.snapshot()) trainer.extend(extensions.snapshot_object( model, 'model_iter_{.updater.iteration}')) if args.resume: chainer.serializers.load_npz(args.resume, trainer) trainer.run() # Evaluate the final model print('test') eval_rnn.reset_state() evaluator = extensions.Evaluator(test_iter, eval_model, device=args.gpu) result = evaluator() print('test perplexity: {}'.format(np.exp(float(result['main/loss'])))) # Serialize the final model chainer.serializers.save_npz(args.model, model)
def main(): ''' main function, start point ''' # 引数関連 parser = argparse.ArgumentParser() parser.add_argument('--batchsize', '-b', type=int, default=128, help='Number of images in each mini-batch') parser.add_argument('--learnrate', '-l', type=float, default=0.001, help='Learning rate for SGD') parser.add_argument('--epoch', '-e', type=int, default=100, help='Number of sweeps over the dataset to train') parser.add_argument('--gpu0', '-g', type=int, default=0, help='GPU1 ID (negative value indicates CPU)') parser.add_argument('--gpu1', '-G', type=int, default=2, help='GPU2 ID (negative value indicates CPU)') parser.add_argument('--resume', '-r', default='', help='Resume the training from snapshot') parser.add_argument('--iter_parallel', '-p', action='store_true', default=False, help='loading dataset from disk') parser.add_argument('--opt', '-o', type=str, choices=('adam', 'sgd'), default='adam') parser.add_argument('--fsize', '-f', type=int, default=5) parser.add_argument('--ch', '-c', type=int, default=4) args = parser.parse_args() # parameter出力 print("-=Learning Parameter=-") print("# Max Epochs: {}".format(args.epoch)) print("# Batch Size: {}".format(args.batchsize)) print("# Learning Rate: {}".format(args.learnrate)) print("# Optimizer Method: {}".format(args.opt)) print("# Filter Size: {}".format(args.fsize)) print("# Channel Scale: {}".format(args.ch)) print('# Train Dataet: General 100') if args.iter_parallel: print("# Data Iters that loads in Parallel") print("\n") # 保存ディレクトリ # save didrectory model_dir_name = 'AEFINet_concat_parallel_opt_{}_ch_{}_fsize_{}'.format( args.opt, args.ch, args.fsize) outdir = path.join(ROOT_PATH, 'results', 'FI', 'AEFINet', model_dir_name) if not path.exists(outdir): os.makedirs(outdir) with open(path.join(outdir, 'arg_param.txt'), 'w') as f: for k, v in args.__dict__.items(): f.write('{}:{}\n'.format(k, v)) #loading dataset print('# loading dataet(General100_train, General100_test) ...') if args.iter_parallel: train = ds.SequenceDataset(dataset='train') test = ds.SequenceDataset(dataset='test') else: train = ds.SequenceDatasetOnMem(dataset='train') test = ds.SequenceDatasetOnMem(dataset='test') chainer.cuda.get_device_from_id(args.gpu0).use() # prepare model model = N.GenEvaluator(N.AEFINetConcat(f_size=args.fsize, ch=args.ch)) # model.to_gpu() # setup optimizer if args.opt == 'adam': optimizer = chainer.optimizers.Adam(alpha=args.learnrate) elif args.opt == 'sgd': optimizer = chainer.optimizers.MomentumSGD(lr=args.learnrate, momentum=0.9) optimizer.setup(model) optimizer.add_hook(chainer.optimizer.WeightDecay(0.0001)) # setup iter if args.iter_parallel: train_iter = chainer.iterators.MultiprocessIterator(train, args.batchsize, n_processes=8) test_iter = chainer.iterators.MultiprocessIterator(test, args.batchsize, repeat=False, shuffle=False, n_processes=8) else: train_iter = chainer.iterators.SerialIterator(train, args.batchsize) test_iter = chainer.iterators.SerialIterator(test, args.batchsize, repeat=False, shuffle=False) # setup trainer updater = training.ParallelUpdater( train_iter, optimizer, devices={ 'main': args.gpu0, 'second': args.gpu1 }, ) trainer = training.Trainer(updater, (args.epoch, 'epoch'), out=outdir) # # eval test data trainer.extend(extensions.Evaluator(test_iter, model, device=args.gpu0)) # dump loss graph trainer.extend(extensions.dump_graph('main/loss')) # lr shift if args.opt == 'sgd': trainer.extend(extensions.ExponentialShift("lr", 0.1), trigger=(100, 'epoch')) elif args.opt == 'adam': trainer.extend(extensions.ExponentialShift("alpha", 0.1), trigger=(100, 'epoch')) # save snapshot trainer.extend(extensions.snapshot(), trigger=(10, 'epoch')) trainer.extend(extensions.snapshot_object( model, 'model_snapshot_{.updater.epoch}'), trigger=(10, 'epoch')) # log report trainer.extend(extensions.LogReport()) trainer.extend(extensions.observe_lr(), trigger=(1, 'epoch')) # plot loss graph trainer.extend( extensions.PlotReport(['main/loss', 'validation/main/loss'], 'epoch', file_name='loss.png')) # plot acc graph trainer.extend( extensions.PlotReport(['main/PSNR', 'validation/main/PSNR'], 'epoch', file_name='PSNR.png')) # print info trainer.extend( extensions.PrintReport([ 'epoch', 'main/loss', 'validation/main/loss', 'main/PSNR', 'validation/main/PSNR', 'lr', 'elapsed_time' ])) # print progbar trainer.extend(extensions.ProgressBar()) # [ChainerUI] enable to send commands from ChainerUI trainer.extend(CommandsExtension()) # [ChainerUI] save 'args' to show experimental conditions save_args(args, outdir) if args.resume: # Resume from a snapshot chainer.serializers.load_npz(args.resume, trainer) trainer.run() # save final model model_outdir = path.join(ROOT_PATH, 'models', model_dir_name) if not path.exists(model_outdir): os.makedirs(model_outdir) model_name = 'AEFINet_concat_opt_{}_ch_{}_fsize_{}.npz'.format( args.opt, args.ch, args.fsize) chainer.serializers.save_npz(path.join(model_outdir, model_name), model) model_parameter = { 'name': 'AEFINetConcat', 'parameter': { 'f_size': args.fsize, 'ch': args.ch } } with open(path.join(model_outdir, 'model_parameter.json'), 'w') as f: json.dump(model_parameter, f)
def setup_trainer(self): self.updater = chainer.training.updater.StandardUpdater( self.train_iterator, self.optimizer, device=self.gpu) self.trainer = chainer.training.Trainer(self.updater, (self.max_epoch, 'epoch'), out=self.out_dir) self.trainer.extend(extensions.Evaluator(self.val_iterator, self.model, device=self.gpu), trigger=(self.eval_interval, self.eval_interval_type)) # Save snapshot self.trainer.extend(extensions.snapshot_object( self.model, savefun=S.save_npz, filename='model_snapshot.npz'), trigger=chainer.training.triggers.MinValueTrigger( 'validation/main/loss', (self.save_interval, self.save_interval_type))) # Dump network architecture self.trainer.extend( extensions.dump_graph(root_name='main/loss', out_name='network_architecture.dot')) # Logging self.trainer.extend( extensions.ProgressBar( update_interval=self.progressbar_update_interval)) self.trainer.extend(extensions.observe_lr(), trigger=(self.log_interval, self.log_interval_type)) self.trainer.extend( extensions.LogReport(log_name='log.json', trigger=(self.log_interval, self.log_interval_type))) self.trainer.extend(extensions.PrintReport([ 'iteration', 'epoch', 'elapsed_time', 'lr', 'main/loss', 'validation/main/loss', ]), trigger=(self.print_interval, self.print_interval_type)) # Plot self.trainer.extend(extensions.PlotReport( [ 'main/loss', 'validation/main/loss', ], file_name='loss_plot.png', x_key=self.plot_interval_type, trigger=(self.plot_interval, self.plot_interval_type)), trigger=(self.plot_interval, self.plot_interval_type)) # Dump params params = dict() params['model_name'] = self.model_name params['train_dataset_dir'] = self.train_dataset_dir params['val_dataset_dir'] = self.val_dataset_dir params['class_names'] = self.train_dataset.class_names params['timestamp'] = self.timestamp_iso params['out_dir'] = self.out_dir params['gpu'] = self.gpu params['batch_size'] = self.batch_size params['max_epoch'] = self.max_epoch params['lr'] = self.lr params['weight_decay'] = self.weight_decay self.trainer.extend( fcn.extensions.ParamsReport(params, file_name='params.yaml')) # Dump param for fcn_object_segmentation.py model_name = dict() model_name['model_name'] = self.model_name self.trainer.extend( fcn.extensions.ParamsReport(model_name, file_name='model_name.yaml')) target_names = dict() target_names['target_names'] = self.train_dataset.class_names self.trainer.extend( fcn.extensions.ParamsReport(target_names, file_name='target_names.yaml'))
def main(): archs = { 'alex': alex.Alex, 'alex_fp16': alex.AlexFp16, 'googlenet': googlenet.GoogLeNet, 'googlenetbn': googlenetbn.GoogLeNetBN, 'googlenetbn_fp16': googlenetbn.GoogLeNetBNFp16, 'nin': nin.NIN, 'resnet50': resnet50.ResNet50 } parser = argparse.ArgumentParser( description='Learning convnet from ILSVRC2012 dataset') parser.add_argument('train', help='Path to training image-label list file') parser.add_argument('val', help='Path to validation image-label list file') parser.add_argument('--arch', '-a', choices=archs.keys(), default='nin', help='Convnet architecture') parser.add_argument('--batchsize', '-B', type=int, default=32, help='Learning minibatch size') parser.add_argument('--epoch', '-E', type=int, default=10, help='Number of epochs to train') parser.add_argument('--gpu', '-g', type=int, default=-1, help='GPU ID (negative value indicates CPU') parser.add_argument('--initmodel', help='Initialize the model from given file') parser.add_argument('--loaderjob', '-j', type=int, help='Number of parallel data loading processes') parser.add_argument('--mean', '-m', default='mean.npy', help='Mean file (computed by compute_mean.py)') parser.add_argument('--resume', '-r', default='', help='Initialize the trainer from given file') parser.add_argument('--out', '-o', default='result', help='Output directory') parser.add_argument('--root', '-R', default='.', help='Root directory path of image files') parser.add_argument('--val_batchsize', '-b', type=int, default=250, help='Validation minibatch size') parser.add_argument('--test', action='store_true') parser.set_defaults(test=False) args = parser.parse_args() # Initialize the model to train model = archs[args.arch]() if args.initmodel: print('Load model from', args.initmodel) chainer.serializers.load_npz(args.initmodel, model) if args.gpu >= 0: chainer.cuda.get_device_from_id(args.gpu).use() # Make the GPU current model.to_gpu() # Load the datasets and mean file mean = np.load(args.mean) train = PreprocessedDataset(args.train, args.root, mean, model.insize) val = PreprocessedDataset(args.val, args.root, mean, model.insize, False) # These iterators load the images with subprocesses running in parallel to # the training/validation. train_iter = chainer.iterators.MultiprocessIterator( train, args.batchsize, n_processes=args.loaderjob) val_iter = chainer.iterators.MultiprocessIterator( val, args.val_batchsize, repeat=False, n_processes=args.loaderjob) # Set up an optimizer optimizer = chainer.optimizers.MomentumSGD(lr=0.01, momentum=0.9) optimizer.setup(model) # Set up a trainer updater = training.updaters.StandardUpdater(train_iter, optimizer, device=args.gpu) trainer = training.Trainer(updater, (args.epoch, 'epoch'), args.out) val_interval = (1 if args.test else 100000), 'iteration' log_interval = (1 if args.test else 1000), 'iteration' trainer.extend(extensions.Evaluator(val_iter, model, device=args.gpu), trigger=val_interval) trainer.extend(extensions.dump_graph('main/loss')) trainer.extend(extensions.snapshot(), trigger=val_interval) trainer.extend(extensions.snapshot_object( model, 'model_iter_{.updater.iteration}'), trigger=val_interval) # Be careful to pass the interval directly to LogReport # (it determines when to emit log rather than when to read observations) trainer.extend(extensions.LogReport(trigger=log_interval)) trainer.extend(extensions.observe_lr(), trigger=log_interval) trainer.extend(extensions.PrintReport([ 'epoch', 'iteration', 'main/loss', 'validation/main/loss', 'main/accuracy', 'validation/main/accuracy', 'lr' ]), trigger=log_interval) trainer.extend(extensions.ProgressBar(update_interval=10)) if args.resume: chainer.serializers.load_npz(args.resume, trainer) trainer.run()
def main(): parser = argparse.ArgumentParser() parser.add_argument('--gpu', '-g', default=-1, type=int, help='GPU ID (negative value indicates CPU)') parser.add_argument('--unit', '-u', default=300, type=int, help='number of units') parser.add_argument('--window', '-w', default=5, type=int, help='window size') parser.add_argument('--batchsize', '-b', type=int, default=1000, help='learning minibatch size') parser.add_argument('--epoch', '-e', default=20, type=int, help='number of epochs to learn') parser.add_argument('--model', '-m', choices=['skipgram', 'cbow'], default='cbow', help='model type ("skipgram", "cbow")') parser.add_argument('--negative-size', default=5, type=int, help='number of negative samples') parser.add_argument('--out-type', '-o', choices=['hsm', 'ns', 'original'], default='hsm', help='output model type ("hsm": hierarchical softmax, ' '"ns": negative sampling, "original": ' 'no approximation)') parser.add_argument('--out', default='result', help='Directory to output the result') parser.add_argument('--test', dest='test', action='store_true') parser.set_defaults(test=False) args = parser.parse_args() if args.gpu >= 0: chainer.backends.cuda.get_device_from_id(args.gpu).use() cuda.check_cuda_available() print('GPU: {}'.format(args.gpu)) print('# unit: {}'.format(args.unit)) print('Window: {}'.format(args.window)) print('Minibatch-size: {}'.format(args.batchsize)) print('# epoch: {}'.format(args.epoch)) print('Training model: {}'.format(args.model)) print('Output type: {}'.format(args.out_type)) print('') if args.gpu >= 0: cuda.get_device_from_id(args.gpu).use() # Load the dataset print('load_start') train, val, _ = chainer.datasets.get_ptb_words() print('loar_finish') vocab = chainer.datasets.get_ptb_words_vocabulary() train, val, _, vocab, original_index, ori_con_data = w2v_mi.get_pair( train, val, _, vocab) counts = collections.Counter(train) counts.update(collections.Counter(val)) n_vocab = max(train) + 1 if args.test: train = train[:100] val = val[:100] index2word = {wid: word for word, wid in six.iteritems(vocab)} print('n_vocab: %d' % n_vocab) print('data length: %d' % len(train)) if args.out_type == 'hsm': HSM = L.BinaryHierarchicalSoftmax tree = HSM.create_huffman_tree(counts) loss_func = HSM(args.unit, tree) loss_func.W.data[...] = 0 elif args.out_type == 'ns': cs = [counts[w] for w in range(len(counts))] loss_func = L.NegativeSampling(args.unit, cs, args.negative_size) loss_func.W.data[...] = 0 elif args.out_type == 'original': loss_func = SoftmaxCrossEntropyLoss(args.unit, n_vocab) else: raise Exception('Unknown output type: {}'.format(args.out_type)) # Choose the model if args.model == 'skipgram': model = SkipGram(n_vocab, args.unit, loss_func, ori_con_data) elif args.model == 'cbow': print('chose_model cbow') model = ContinuousBoW(n_vocab, args.unit, loss_func, ori_con_data) else: raise Exception('Unknown model type: {}'.format(args.model)) if args.gpu >= 0: model.to_gpu() # Set up an optimizer optimizer = O.Adam() optimizer.setup(model) # Set up an iterator train_iter = WindowIterator(train, args.window, args.batchsize, original_index) val_iter = WindowIterator(val, args.window, args.batchsize, original_index, repeat=False) # Set up an updater updater = training.StandardUpdater(train_iter, optimizer, converter=convert, device=args.gpu) # Set up a trainer trainer = training.Trainer(updater, (args.epoch, 'epoch'), out=args.out) trainer.extend( extensions.Evaluator(val_iter, model, converter=convert, device=args.gpu)) trainer.extend(extensions.LogReport()) trainer.extend( extensions.PrintReport(['epoch', 'main/loss', 'validation/main/loss'])) trainer.extend(extensions.ProgressBar()) print('run_start') trainer.run() # Save the word2vec model with open('word2vec.model', 'w') as f: f.write('%d %d\n' % (len(index2word) - 1, args.unit)) w = cuda.to_cpu(model.embed.W.data) for i, wi in enumerate(w): if i == len(index2word) - 1: print(i) continue v = ' '.join(map(str, wi)) f.write('%s %s\n' % (index2word[i], v))
def main(): start = time.time() current_datetime = '{}'.format(datetime.datetime.today()) parser = argparse.ArgumentParser(description='Chainer Text Classification') parser.add_argument('--batchsize', '-b', type=int, default=64, help='Number of images in each mini-batch') parser.add_argument('--epoch', '-e', type=int, default=30, help='Number of sweeps over the dataset to train') parser.add_argument('--gpu', '-g', type=int, default=-1, help='GPU ID (negative value indicates CPU)') parser.add_argument('--out', '-o', default='result', help='Directory to output the result') parser.add_argument('--unit', '-u', type=int, default=200, help='Number of units') parser.add_argument('--vocab', '-v', type=int, default=100000, help='Number of max vocabulary') parser.add_argument('--layer', '-l', type=int, default=1, help='Number of layers of RNN or MLP following CNN') parser.add_argument('--dropout', '-d', type=float, default=0.4, help='Dropout rate') parser.add_argument('--dataset', '-dataset', required=True, help='train dataset') parser.add_argument('--size', '-size', type=int, default=-1, help='train dataset size -> def train:3/4, test:1/4') parser.add_argument('--model', '-model', default='cnn', choices=['cnn', 'lstm', 'bow', 'gru'], help='Name of encoder model type.') parser.add_argument('--early-stop', action='store_true', help='use early stopping method') parser.add_argument('--same-network', action='store_true', help='use same network between i1 and i2') parser.add_argument('--save-init', action='store_true', help='save init model') parser.add_argument('--char-based', action='store_true') args = parser.parse_args() print(json.dumps(args.__dict__, indent=2)) train, test, vocab = get_input_dataset(args.dataset, vocab=None, max_vocab_size=args.vocab) print('# train data: {}'.format(len(train))) print('# dev data: {}'.format(len(test))) print('# vocab: {}'.format(len(vocab))) n_class = len(set([int(d[-1]) for d in train])) print('# class: {}'.format(n_class)) train_iter = chainer.iterators.SerialIterator(train, args.batchsize) test_iter = chainer.iterators.SerialIterator(test, args.batchsize, repeat=False, shuffle=False) # Setup a model if args.model == 'lstm': Encoder = nets.LSTMEncoder elif args.model == 'cnn': Encoder = nets.CNNEncoder elif args.model == 'bow': Encoder = nets.BOWMLPEncoder elif args.model == 'gru': Encoder = nets.GRUEncoder encoder = Encoder(n_layers=args.layer, n_vocab=len(vocab), n_units=args.unit, dropout=args.dropout, same_network=args.same_network) model = nets.TextClassifier(encoder, n_class) if args.gpu >= 0: # Make a specified GPU current chainer.backends.cuda.get_device_from_id(args.gpu).use() model.to_gpu() # Copy the model to the GPU # Setup an optimizer optimizer = chainer.optimizers.Adam() optimizer.setup(model) optimizer.add_hook(chainer.optimizer.WeightDecay(1e-4)) # Set up a trainer updater = training.updaters.StandardUpdater(train_iter, optimizer, converter=convert_seq2, device=args.gpu) # early Stopping if args.early_stop: stop_trigger = triggers.EarlyStoppingTrigger( monitor='validation/main/loss', max_trigger=(args.epoch, 'epoch')) else: stop_trigger = (args.epoch, 'epoch') trainer = training.Trainer(updater, stop_trigger, out=args.out) # Evaluate the model with the test dataset for each epoch trainer.extend( extensions.Evaluator(test_iter, model, converter=convert_seq2, device=args.gpu)) # Take a best snapshot record_trigger = training.triggers.MaxValueTrigger( 'validation/main/accuracy', (1, 'epoch')) trainer.extend(extensions.snapshot_object(model, 'best_model.npz'), trigger=record_trigger) # Write a log of evaluation statistics for each epoch trainer.extend(extensions.LogReport()) trainer.extend( extensions.PrintReport([ 'epoch', 'main/loss', 'validation/main/loss', 'main/accuracy', 'validation/main/accuracy', 'elapsed_time' ])) # Print a progress bar to stdout # trainer.extend(extensions.ProgressBar()) # Save vocabulary and model's setting if not os.path.isdir(args.out): os.mkdir(args.out) vocab_path = os.path.join(args.out, 'vocab.json') with open(vocab_path, 'w') as f: json.dump(vocab, f) model_path = os.path.join(args.out, 'best_model.npz') model_setup = args.__dict__ model_setup['vocab_path'] = vocab_path model_setup['model_path'] = model_path model_setup['n_class'] = n_class model_setup['datetime'] = current_datetime with open(os.path.join(args.out, 'args.json'), 'w') as f: json.dump(args.__dict__, f) if args.save_init: chainer.serializers.save_npz(os.path.join(args.out, 'init_model.npz'), model) exit() # Run the training print('Start trainer.run: {}'.format(current_datetime)) trainer.run() print('Elapsed_time: {}'.format( datetime.timedelta(seconds=time.time() - start)))
def main(): # command line argument parsing parser = argparse.ArgumentParser( description='Multi-Perceptron classifier/regressor') parser.add_argument('train', help='Path to csv file') parser.add_argument('--root', '-R', default="betti", help='Path to image files') parser.add_argument('--val', help='Path to validation csv file', required=True) parser.add_argument('--regress', '-r', action='store_true', help='set for regression, otherwise classification') parser.add_argument('--time_series', '-ts', action='store_true', help='set for time series data') parser.add_argument('--batchsize', '-b', type=int, default=10, help='Number of samples in each mini-batch') parser.add_argument('--layer', '-l', type=str, choices=['res5', 'pool5'], default='pool5', help='output layer of the pretrained ResNet') parser.add_argument('--fch', type=int, nargs="*", default=[], help='numbers of channels for the last fc layers') parser.add_argument('--cols', '-c', type=int, nargs="*", default=[1], help='column indices in csv of target variables') parser.add_argument('--epoch', '-e', type=int, default=300, help='Number of sweeps over the dataset to train') parser.add_argument('--snapshot', '-s', type=int, default=100, help='snapshot interval') parser.add_argument('--initmodel', '-i', help='Initialize the model from given file') parser.add_argument('--random', '-rt', type=int, default=1, help='random translation') parser.add_argument('--gpu', '-g', type=int, help='GPU ID (negative value indicates CPU)') parser.add_argument('--loaderjob', '-j', type=int, default=3, help='Number of parallel data loading processes') parser.add_argument('--outdir', '-o', default='result', help='Directory to output the result') parser.add_argument('--optimizer', '-op', choices=optim.keys(), default='Adam', help='optimizer') parser.add_argument('--resume', type=str, default=None, help='Resume the training from snapshot') parser.add_argument('--predict', '-p', action='store_true', help='prediction with a specified model') parser.add_argument('--tuning_rate', '-tr', type=float, default=0.1, help='learning rate for pretrained layers') parser.add_argument('--dropout', '-dr', type=float, default=0, help='dropout ratio for the FC layers') parser.add_argument('--cw', '-cw', type=int, default=128, help='crop image width') parser.add_argument('--ch', '-ch', type=int, default=128, help='crop image height') parser.add_argument('--weight_decay', '-w', type=float, default=1e-6, help='weight decay for regularization') parser.add_argument('--wd_norm', '-wn', choices=['none', 'l1', 'l2'], default='l2', help='norm of weight decay for regularization') parser.add_argument('--dtype', '-dt', choices=dtypes.keys(), default='fp32', help='floating point precision') args = parser.parse_args() args.outdir = os.path.join(args.outdir, dt.now().strftime('%m%d_%H%M')) # Enable autotuner of cuDNN chainer.config.autotune = True chainer.config.dtype = dtypes[args.dtype] chainer.print_runtime_info() # read csv file train = Dataset(args.root, args.train, cw=args.cw, ch=args.ch, random=args.random, regression=args.regress, time_series=args.time_series, cols=args.cols) test = Dataset(args.root, args.val, cw=args.cw, ch=args.ch, regression=args.regress, time_series=args.time_series, cols=args.cols) ## if not args.gpu: if chainer.cuda.available: args.gpu = 0 else: args.gpu = -1 print(args) save_args(args, args.outdir) if args.regress: accfun = F.mean_absolute_error lossfun = F.mean_squared_error args.chs = len(args.cols) else: accfun = F.accuracy lossfun = F.softmax_cross_entropy args.chs = max(train.chs, test.chs) if len(args.cols) > 1: print("\n\nClassification only works with a single target.\n\n") exit() # Set up a neural network to train model = L.Classifier(Resnet(args), lossfun=lossfun, accfun=accfun) # Set up an optimizer optimizer = optim[args.optimizer]() optimizer.setup(model) if args.weight_decay > 0: if args.wd_norm == 'l2': optimizer.add_hook(chainer.optimizer.WeightDecay( args.weight_decay)) elif args.wd_norm == 'l1': optimizer.add_hook(chainer.optimizer_hooks.Lasso( args.weight_decay)) # slow update for pretrained layers if args.optimizer in ['Adam']: for func_name in model.predictor.base._children: for param in model.predictor.base[func_name].params(): param.update_rule.hyperparam.alpha *= args.tuning_rate if args.initmodel: print('Load model from: ', args.initmodel) chainer.serializers.load_npz(args.initmodel, model) if args.gpu >= 0: chainer.cuda.get_device(args.gpu).use() # Make a specified GPU current model.to_gpu() # Copy the model to the GPU # select numpy or cupy xp = chainer.cuda.cupy if args.gpu >= 0 else np # train_iter = iterators.SerialIterator(train, args.batchsize, shuffle=True) # test_iter = iterators.SerialIterator(test, args.batchsize, repeat=False, shuffle=False) train_iter = iterators.MultithreadIterator(train, args.batchsize, shuffle=True, n_threads=args.loaderjob) test_iter = iterators.MultithreadIterator(test, args.batchsize, repeat=False, shuffle=False, n_threads=args.loaderjob) updater = training.StandardUpdater(train_iter, optimizer, device=args.gpu) trainer = training.Trainer(updater, (args.epoch, 'epoch'), out=args.outdir) frequency = args.epoch if args.snapshot == -1 else max(1, args.snapshot) log_interval = 1, 'epoch' val_interval = 20, 'epoch' # frequency/10, 'epoch' # trainer.extend(extensions.snapshot(filename='snapshot_epoch_{.updater.epoch}'), trigger=(frequency, 'epoch')) trainer.extend(extensions.snapshot_object(model, 'model_epoch_{.updater.epoch}'), trigger=(frequency, 'epoch')) trainer.extend(extensions.LogReport(trigger=log_interval)) trainer.extend(extensions.dump_graph('main/loss')) trainer.extend(extensions.Evaluator(test_iter, model, device=args.gpu), trigger=val_interval) if args.optimizer in ['Momentum', 'AdaGrad', 'RMSprop']: trainer.extend(extensions.observe_lr(), trigger=log_interval) trainer.extend(extensions.ExponentialShift('lr', 0.5), trigger=(args.epoch / 5, 'epoch')) elif args.optimizer in ['Adam']: trainer.extend(extensions.observe_lr(), trigger=log_interval) trainer.extend(extensions.ExponentialShift("alpha", 0.5, optimizer=optimizer), trigger=(args.epoch / 5, 'epoch')) if extensions.PlotReport.available(): trainer.extend( extensions.PlotReport(['main/loss', 'validation/main/loss'], 'epoch', file_name='loss.png')) trainer.extend( extensions.PlotReport( ['main/accuracy', 'validation/main/accuracy'], 'epoch', file_name='accuracy.png')) trainer.extend(extensions.PrintReport([ 'epoch', 'main/loss', 'main/accuracy', 'validation/main/loss', 'validation/main/accuracy', 'elapsed_time', 'lr' ]), trigger=log_interval) trainer.extend(extensions.ProgressBar(update_interval=10)) if args.resume: chainer.serializers.load_npz(args.resume, trainer) # ChainerUI #trainer.extend(CommandsExtension()) trainer.extend(extensions.LogReport(trigger=log_interval)) if not args.predict: trainer.run() ## prediction print("predicting: {} entries...".format(len(test))) test_iter = iterators.SerialIterator(test, args.batchsize, repeat=False, shuffle=False) converter = concat_examples idx = 0 with open(os.path.join(args.outdir, 'result.txt'), 'w') as output: for batch in test_iter: x, t = converter(batch, device=args.gpu) with chainer.using_config('train', False): with chainer.function.no_backprop_mode(): if args.regress: y = model.predictor(x).data if args.gpu > -1: y = xp.asnumpy(y) t = xp.asnumpy(t) y = y * test.std + test.mean t = t * test.std + test.mean else: y = F.softmax(model.predictor(x)).data if args.gpu > -1: y = xp.asnumpy(y) t = xp.asnumpy(t) for i in range(y.shape[0]): output.write(os.path.basename(test.ids[idx])) if (len(t.shape) > 1): for j in range(t.shape[1]): output.write(",{}".format(t[i, j])) output.write(",{}".format(y[i, j])) else: output.write(",{}".format(t[i])) output.write(",{}".format(np.argmax(y[i, :]))) for yy in y[i]: output.write(",{0:1.5f}".format(yy)) output.write("\n") idx += 1
def main(): parse = argparse.ArgumentParser(description='face detection train') parse.add_argument('--batchsize', '-b', type=int, default=100, help='Number if images in each mini batch') parse.add_argument('--epoch', '-e', type=int, default=20, help='Number of sweeps over the dataset to train') parse.add_argument('--gpu', '-g', type=int, default=-1, help='GPU ID(negative value indicates CPU') parse.add_argument('--out', '-o', default='result', help='Directory to output the result') parse.add_argument('--resume', '-r', default='', help='Resume the training from snapshot') parse.add_argument('--unit', '-u', type=int, default=1000, help='Number of units') parse.add_argument('--model', '-m', default='') parse.add_argument('--optimizer', '-O', default='') parse.add_argument('--size', '-s', type=int, default=128, help='image size') parse.add_argument('--path', '-p', default='') parse.add_argument('--channel', '-c', default=3) parse.add_argument('--caffemodelpath', '-cmp', default="") args = parse.parse_args() print('GPU: {}'.format(args.gpu)) print('# unit: {}'.format(args.unit)) print('# Minibatch-size: {}'.format(args.batchsize)) print('# epoch: {}'.format(args.epoch)) print('') pathsAndLabels = [] label_i = 0 data_list = glob.glob(args.path + "*") datatxt = open("whoiswho.txt", "w") print(data_list) for datafinderName in data_list: pathsAndLabels.append(np.asarray([datafinderName + "/", label_i])) pattern = r".*/(.*)" matchOB = re.finditer(pattern, datafinderName) directoryname = "" if matchOB: for a in matchOB: directoryname += a.groups()[0] datatxt.write(directoryname + "," + str(label_i) + "\n") label_i = label_i + 1 datatxt.close() train, test = image2TrainAndTest(pathsAndLabels, channels=args.channel) if args.caffemodelpath == '': print("no fine tuning") model = L.Classifier(alexLike.AlexLike(len(pathsAndLabels))) #model = L.Classifier(alexLike.GoogLeNet( len(pathsAndLabels) )) #model = alexLike.Alex( len(pathsAndLabels) ) else: print("fine tuning using ", args.caffemodelpath) model = L.Classifier(alexLike.FromCaffeAlexnet(len(pathsAndLabels))) original_model = pickle.load(open(args.caffemodelpath, "rb")) copy_model(original_model, model) if args.gpu >= 0: chainer.cuda.get_device(args.gpu).use() # Make a specified GPU current model.to_gpu() # Copy the model to the GPU # Setup an optimizer optimizer = chainer.optimizers.Adam() optimizer.setup(model) if args.model != '' and args.optimizer != '': chainer.serializers.load_npz(args.model, model) chainer.serializers.load_npz(args.optimizer, optimizer) train_iter = chainer.iterators.SerialIterator(train, args.batchsize) test_iter = chainer.iterators.SerialIterator(test, args.batchsize, repeat=False, shuffle=False) # Set up a trainer updater = training.StandardUpdater(train_iter, optimizer, device=args.gpu) trainer = training.Trainer(updater, (args.epoch, 'epoch'), out=args.out) trainer.extend(extensions.Evaluator(test_iter, model, device=args.gpu)) trainer.extend(extensions.dump_graph('main/loss')) trainer.extend(extensions.LogReport()) trainer.extend( extensions.PrintReport([ 'epoch', 'main/loss', 'validation/main/loss', 'main/accuracy', 'validation/main/accuracy' ])) trainer.extend(extensions.ProgressBar()) trainer.run() outputname = "my_output_" + str(len(pathsAndLabels)) modelOutName = outputname + ".model" OptimOutName = outputname + ".state" chainer.serializers.save_npz(modelOutName, model) chainer.serializers.save_npz(OptimOutName, optimizer)
def main(): parser = argparse.ArgumentParser(description='Chainer CIFAR example:') parser.add_argument('--dataset', '-d', default='cifar10', help='The dataset to use: cifar10 or cifar100') parser.add_argument('--batchsize', '-b', type=int, default=64, help='Number of images in each mini-batch') parser.add_argument('--learnrate', '-l', type=float, default=0.05, help='Learning rate for SGD') parser.add_argument('--epoch', '-e', type=int, default=300, help='Number of sweeps over the dataset to train') parser.add_argument('--gpu', '-g', type=int, default=0, help='GPU ID (negative value indicates CPU)') parser.add_argument('--out', '-o', default='result', help='Directory to output the result') parser.add_argument('--resume', '-r', default='', help='Resume the training from snapshot') args = parser.parse_args() print('GPU: {}'.format(args.gpu)) print('# Minibatch-size: {}'.format(args.batchsize)) print('# epoch: {}'.format(args.epoch)) print('') # Set up a neural network to train. # Classifier reports softmax cross entropy loss and accuracy at every # iteration, which will be used by the PrintReport extension below. if args.dataset == 'cifar10': print('Using CIFAR10 dataset.') class_labels = 10 train, test = get_cifar10() elif args.dataset == 'cifar100': print('Using CIFAR100 dataset.') class_labels = 100 train, test = get_cifar100() else: raise RuntimeError('Invalid dataset choice.') model = L.Classifier(models.VGG.VGG(class_labels)) if args.gpu >= 0: # Make a specified GPU current chainer.cuda.get_device_from_id(args.gpu).use() model.to_gpu() # Copy the model to the GPU optimizer = chainer.optimizers.MomentumSGD(args.learnrate) optimizer.setup(model) optimizer.add_hook(chainer.optimizer.WeightDecay(5e-4)) train_iter = chainer.iterators.SerialIterator(train, args.batchsize) test_iter = chainer.iterators.SerialIterator(test, args.batchsize, repeat=False, shuffle=False) # Set up a trainer updater = training.StandardUpdater(train_iter, optimizer, device=args.gpu) trainer = training.Trainer(updater, (args.epoch, 'epoch'), out=args.out) # Evaluate the model with the test dataset for each epoch trainer.extend(extensions.Evaluator(test_iter, model, device=args.gpu)) # Reduce the learning rate by half every 25 epochs. trainer.extend(extensions.ExponentialShift('lr', 0.5), trigger=(25, 'epoch')) # Dump a computational graph from 'loss' variable at the first iteration # The "main" refers to the target link of the "main" optimizer. trainer.extend(extensions.dump_graph('main/loss')) # Take a snapshot at each epoch trainer.extend(extensions.snapshot(), trigger=(args.epoch, 'epoch')) # Take a snapshot of best model trainer.extend(extensions.snapshot_object(model, 'model_best'), trigger=MinValueTrigger('validation/main/loss')) # Write a log of evaluation statistics for each epoch trainer.extend(extensions.LogReport()) # Save loss and accuracy plot if extensions.PlotReport.available(): trainer.extend( extensions.PlotReport(['main/loss', 'validation/main/loss'], 'epoch', file_name='loss.png')) trainer.extend( extensions.PlotReport( ['main/accuracy', 'validation/main/accuracy'], 'epoch', file_name='accuracy.png')) # Print selected entries of the log to stdout # Here "main" refers to the target link of the "main" optimizer again, and # "validation" refers to the default name of the Evaluator extension. # Entries other than 'epoch' are reported by the Classifier link, called by # either the updater or the evaluator. trainer.extend( extensions.PrintReport([ 'epoch', 'main/loss', 'validation/main/loss', 'main/accuracy', 'validation/main/accuracy', 'elapsed_time' ])) # Print a progress bar to stdout trainer.extend(extensions.ProgressBar()) if args.resume: # Resume from a snapshot chainer.serializers.load_npz(args.resume, trainer) # Run the training trainer.run()
def main(): parser = argparse.ArgumentParser() parser.add_argument('--resume', '-m', type=str, default=None) parser.add_argument('--model', type=str, default='gat', choices=['gat', 'gcn']) parser.add_argument('--dataset', type=str, default='cora', choices=['cora', 'pubmed', 'citeseer']) parser.add_argument('--lr', type=float, default=0.005, help='Learning rate') parser.add_argument('--epoch', '-e', type=int, default=5000, help='Number of sweeps over the dataset to train') parser.add_argument('--gpu', '-g', type=int, default=-1, help='GPU ID (negative value indicates CPU)') parser.add_argument('--out', '-o', default='result', help='Directory to output the result') parser.add_argument('--unit', '-u', type=int, default=8, help='Number of units') parser.add_argument('--dropout', '-d', type=float, default=0.5, help='Dropout rate') parser.add_argument('--weight-decay', type=float, default=5e-4) parser.add_argument('--validation-interval', type=int, default=1, help='Number of updates before running validation') parser.add_argument('--early-stopping', action='store_true', help='Enable early stopping.') parser.add_argument( '--normalization', default='gcn', choices=['pygcn', 'gcn'], help='Variant of adjacency matrix normalization method to use') args = parser.parse_args() print("Loading data") adj, features, labels, idx_train, idx_val, idx_test = load_data( args.dataset, normalization=args.normalization) train_iter = chainer.iterators.SerialIterator(idx_train, batch_size=len(idx_train), shuffle=False) dev_iter = chainer.iterators.SerialIterator(idx_val, batch_size=len(idx_val), repeat=False, shuffle=False) # Set up a neural network to train. print("Building model %s" % args.model) model_cls = GAT if args.model == 'gat' else GCN model = model_cls(adj, features, labels, args.unit, dropout=args.dropout) if args.gpu >= 0: # Make a specified GPU current chainer.backends.cuda.get_device_from_id(args.gpu).use() model.to_gpu() # Copy the model to the GPU optimizer = chainer.optimizers.Adam(alpha=args.lr) optimizer.setup(model) if args.weight_decay > 0.: optimizer.add_hook( chainer.optimizer_hooks.WeightDecay(args.weight_decay)) if args.resume != None: print("Loading model from " + args.resume) chainer.serializers.load_npz(args.resume, model) updater = training.StandardUpdater(train_iter, optimizer, device=args.gpu) trigger = training.triggers.EarlyStoppingTrigger( monitor='validation/main/loss', patients=100, check_trigger=(args.validation_interval, 'epoch'), max_trigger=(args.epoch, 'epoch')) trainer = training.Trainer(updater, trigger, out=args.out) trainer.extend(extensions.Evaluator(dev_iter, model, device=args.gpu), trigger=(args.validation_interval, 'epoch')) trainer.extend(extensions.LogReport()) trainer.extend( extensions.PrintReport([ 'epoch', 'main/loss', 'validation/main/loss', 'main/accuracy', 'validation/main/accuracy', 'elapsed_time' ])) if args.early_stopping: # Take a best snapshot record_trigger = training.triggers.MinValueTrigger( 'validation/main/loss', (args.validation_interval, 'epoch')) trainer.extend(extensions.snapshot_object(model, 'best_model.npz'), trigger=record_trigger) trainer.run() if args.early_stopping: chainer.serializers.load_npz(os.path.join(args.out, 'best_model.npz'), model) else: chainer.serializers.save_npz(os.path.join(args.out, 'best_model.npz'), model) print('Running test...') with chainer.using_config('train', False), chainer.no_backprop_mode(): _, accuracy = model.evaluate(idx_test) print('Test accuracy = %f' % accuracy)
def main(): ''' main function, start point ''' # 引数関連 parser = argparse.ArgumentParser() parser.add_argument('--batchsize', '-b', type=int, default=128, help='Number of images in each mini-batch') parser.add_argument('--learnrate', '-l', type=float, default=0.001, help='Learning rate for SGD') parser.add_argument('--epoch', '-e', type=int, default=100, help='Number of sweeps over the dataset to train') parser.add_argument('--gpu', '-g', type=int, default=0, help='GPU ID (negative value indicates CPU)') parser.add_argument('--resume', '-r', default='', help='Resume the training from snapshot') parser.add_argument('--iter_parallel', '-p', action='store_true', default=False, help='filter(kernel) sizes') parser.add_argument('--opt', '-o', type=str, choices=('adam', 'sgd'), default='adam') args = parser.parse_args() # parameter出力 print("-=Learning Parameter=-") print("# Max Epochs: {}".format(args.epoch)) print("# Batch Size: {}".format(args.batchsize)) print("# Learning Rate: {}".format(args.learnrate)) print("# Optimizer Method: {}".format(args.opt)) print('# Train Dataet: General 100') if args.iter_parallel: print("# Data Iters that loads in Parallel") print("\n") # 保存ディレクトリ # save didrectory outdir = path.join( ROOT_PATH, 'results/FI/AEFINet/AEFINetConcat_ch4_fsize5_VGG_content_loss_opt_{}'. format(args.opt)) if not path.exists(outdir): os.makedirs(outdir) with open(path.join(outdir, 'arg_param.txt'), 'w') as f: for k, v in args.__dict__.items(): f.write('{}:{}\n'.format(k, v)) print('# loading dataet(General100_train, General100_test) ...') if args.iter_parallel: train = SequenceDataset(dataset='train') test = SequenceDataset(dataset='test') else: train = SequenceDatasetOnMem(dataset='train') test = SequenceDatasetOnMem(dataset='test') # prepare model vgg16 = N.VGG16() if args.gpu >= 0: chainer.cuda.get_device_from_id(args.gpu).use() vgg16.to_gpu() chainer.serializers.load_npz(path.join(ROOT_PATH, 'models/VGG16.npz'), vgg16) model = N.VGG16Evaluator(N.AEFINetConcat(ch=4, f_size=5), vgg16) if args.gpu >= 0: model.to_gpu() # setup optimizer if args.opt == 'adam': optimizer = chainer.optimizers.Adam(alpha=args.learnrate) elif args.opt == 'sgd': optimizer = chainer.optimizers.MomentumSGD(lr=args.learnrate, momentum=0.9) optimizer.setup(model) optimizer.add_hook(chainer.optimizer.WeightDecay(0.0001)) # setup iter if args.iter_parallel: train_iter = chainer.iterators.MultiprocessIterator(train, args.batchsize, n_processes=8) test_iter = chainer.iterators.MultiprocessIterator(test, args.batchsize, repeat=False, shuffle=False, n_processes=8) else: train_iter = chainer.iterators.SerialIterator(train, args.batchsize) test_iter = chainer.iterators.SerialIterator(test, args.batchsize, repeat=False, shuffle=False) # setup trainer updater = training.StandardUpdater(train_iter, optimizer, device=args.gpu) trainer = training.Trainer(updater, (args.epoch, 'epoch'), out=outdir) # # eval test data trainer.extend(extensions.Evaluator(test_iter, model, device=args.gpu)) # dump loss graph trainer.extend(extensions.dump_graph('main/loss')) # lr shift if args.opt == 'sgd': trainer.extend(extensions.ExponentialShift("lr", 0.1), trigger=(100, 'epoch')) if args.opt == 'adam': trainer.extend(extensions.ExponentialShift("alpha", 0.1), trigger=(50, 'epoch')) # save snapshot trainer.extend(extensions.snapshot(), trigger=(10, 'epoch')) trainer.extend(extensions.snapshot_object( model, 'model_snapshot_{.updater.epoch}'), trigger=(10, 'epoch')) # log report trainer.extend(extensions.LogReport()) trainer.extend(extensions.observe_lr(), trigger=(1, 'epoch')) # plot loss graph trainer.extend( extensions.PlotReport(['main/loss', 'validation/main/loss'], 'epoch', file_name='loss.png')) # plot acc graph trainer.extend( extensions.PlotReport(['main/PSNR', 'validation/main/PSNR'], 'epoch', file_name='PSNR.png')) # print info trainer.extend( extensions.PrintReport([ 'epoch', 'main/loss', 'validation/main/loss', 'main/loss_mse', 'main/loss_cont', 'main/PSNR', 'validation/main/PSNR', 'lr', 'elapsed_time' ])) # print progbar trainer.extend(extensions.ProgressBar()) trainer.run()
def main(): parser = argparse.ArgumentParser() parser.add_argument('--config_path', type=str, default='config.ini') # 使用已训练的模型 parser.add_argument('--resume', default='/home/ferryliu/MYRCNN/chainer/trained/bestmodel.npz') # 在训练过程中直观展示模型效果 parser.add_argument('--plot_samples', type=int, default=0) args = parser.parse_args() config = configparser.ConfigParser() config.read(args.config_path, 'UTF-8') chainer.global_config.autotune = True chainer.cuda.set_max_workspace_size(11388608) # create result dir and copy file logger.info('> store file to result dir %s', config.get('result', 'dir')) save_files(config.get('result', 'dir')) # gpu or cpu logger.info('> set up devices') devices = setup_devices(config.get('training_param', 'gpus')) set_random_seed(devices, config.getint('training_param', 'seed')) logger.info('> get dataset') dataset_type = config.get('dataset', 'type') if dataset_type == 'coco': # force to set `use_cache = False` train_set = get_coco_dataset( insize=parse_size(config.get('model_param', 'insize')), image_root=config.get(dataset_type, 'train_images'), annotations=config.get(dataset_type, 'train_annotations'), min_num_keypoints=config.getint(dataset_type, 'min_num_keypoints'), use_cache=False, do_augmentation=True, ) test_set = get_coco_dataset( insize=parse_size(config.get('model_param', 'insize')), image_root=config.get(dataset_type, 'val_images'), annotations=config.get(dataset_type, 'val_annotations'), min_num_keypoints=config.getint(dataset_type, 'min_num_keypoints'), use_cache=False, ) elif dataset_type == 'mpii': train_set, test_set = get_mpii_dataset( insize=parse_size(config.get('model_param', 'insize')), image_root=config.get(dataset_type, 'images'), annotations=config.get(dataset_type, 'annotations'), train_size=config.getfloat(dataset_type, 'train_size'), min_num_keypoints=config.getint(dataset_type, 'min_num_keypoints'), use_cache=config.getboolean(dataset_type, 'use_cache'), seed=config.getint('training_param', 'seed'), ) else: raise Exception('Unknown dataset {}'.format(dataset_type)) logger.info('dataset type: %s', dataset_type) logger.info('training images: %d', len(train_set)) logger.info('validation images: %d', len(test_set)) # 是否在训练时画出效果图 if args.plot_samples > 0: for i in range(args.plot_samples): data = train_set[i] visualize.plot('train-{}.png'.format(i), data['image'], data['keypoints'], data['bbox'], data['is_labeled'], data['edges']) data = test_set[i] visualize.plot('val-{}.png'.format(i), data['image'], data['keypoints'], data['bbox'], data['is_labeled'], data['edges']) # 核心 logger.info('> load model') model = create_model(config, train_set) # # 加载已经训练的模型 # trained_model = './trained/best_snapshot' # chainer.serializers.load_npz(trained_model, model, strict=False) serializers.load_npz(args.resume, model) logger.info('> transform dataset') train_set = TransformDataset(train_set, model.encode) test_set = TransformDataset(test_set, model.encode) logger.info('> create iterators') train_iter = chainer.iterators.MultiprocessIterator( train_set, config.getint('training_param', 'batchsize'), n_processes=config.getint('training_param', 'num_process') ) test_iter = chainer.iterators.SerialIterator( test_set, config.getint('training_param', 'batchsize'), repeat=False, shuffle=False ) logger.info('> setup optimizer') optimizer = chainer.optimizers.MomentumSGD() optimizer.setup(model) optimizer.add_hook(chainer.optimizer.WeightDecay(0.0005)) logger.info('> setup trainer') updater = training.updaters.ParallelUpdater(train_iter, optimizer, devices=devices) trainer = training.Trainer(updater, (config.getint('training_param', 'train_iter'), 'iteration'), config.get('result', 'dir') ) logger.info('> setup extensions') trainer.extend( extensions.LinearShift('lr', value_range=(config.getfloat('training_param', 'learning_rate'), 0), time_range=(0, config.getint('training_param', 'train_iter')) ), trigger=(1, 'iteration') ) trainer.extend(extensions.Evaluator(test_iter, model, device=devices['main'])) if extensions.PlotReport.available(): trainer.extend(extensions.PlotReport([ 'main/loss', 'validation/main/loss', ], 'epoch', file_name='loss.png')) trainer.extend(extensions.LogReport()) trainer.extend(extensions.observe_lr()) trainer.extend(extensions.PrintReport([ 'epoch', 'elapsed_time', 'lr', 'main/loss', 'validation/main/loss', 'main/loss_resp', 'validation/main/loss_resp', 'main/loss_iou', 'validation/main/loss_iou', 'main/loss_coor', 'validation/main/loss_coor', 'main/loss_size', 'validation/main/loss_size', 'main/loss_limb', 'validation/main/loss_limb', ])) trainer.extend(extensions.ProgressBar()) trainer.extend(extensions.snapshot(filename='best_snapshot'), trigger=training.triggers.MinValueTrigger('validation/main/loss')) trainer.extend(extensions.snapshot_object(model, filename='bestmodel.npz'), trigger=training.triggers.MinValueTrigger('validation/main/loss')) if args.resume: print('加载之前训练的模型: --------> ', args.resume) logger.info('> start training') trainer.run()
CosineAnnealing('lr', int(args.epoch), len(train) / args.batchsize, eta_min=args.eta_min, init=args.lr)) else: trainer.extend(extensions.ExponentialShift('lr', 0.1, init=args.lr), trigger=triggers.ManualScheduleTrigger( [int(args.epoch * 0.50), int(args.epoch * 0.75)], 'epoch')) test_interval = 1, 'epoch' snapshot_interval = 10, 'epoch' log_interval = 100, 'iteration' trainer.extend(extensions.Evaluator(test_iter, model, device=args.gpus[0]), trigger=test_interval) trainer.extend(extensions.dump_graph('main/loss')) trainer.extend( extensions.snapshot(filename='snapshot_epoch_{.updater.epoch}'), trigger=snapshot_interval) trainer.extend(extensions.snapshot_object(model, 'model_epoch_{.updater.epoch}'), trigger=snapshot_interval) trainer.extend(extensions.LogReport(trigger=log_interval)) trainer.extend(extensions.observe_lr(), trigger=log_interval) log_list = [ 'epoch', 'iteration', 'main/loss', 'main/accuracy', 'validation/main/loss', 'validation/main/accuracy', 'lr', 'elapsed_time'
max_epoch = 100 mean_loss = 0 delta = 1e-7 model = L.Classifier(model) optimizer.setup(model) updater = training.StandardUpdater(train_iter, optimizer, device=gpu_id) trainer = training.Trainer(updater, (max_epoch, 'epoch'), out='mnist_result') trainer.extend(extensions.LogReport()) trainer.extend( extensions.snapshot(filename='snapshot_epoch-{.updater.epoch}')) trainer.extend(extensions.Evaluator(valid_iter, model, device=gpu_id), name='val') trainer.extend( extensions.PrintReport([ 'epoch', 'main/loss', 'main/accuracy', 'val/main/loss', 'val/main/accuracy', 'l1/W/data/std', 'elapsed_time' ])) trainer.extend( extensions.ParameterStatistics(model.predictor.fc6, {'std': np.std})) trainer.extend( extensions.PlotReport(['l1/W/data/std'], x_key='epoch', file_name='std.png')) trainer.extend( extensions.PlotReport(['main/loss', 'val/main/loss'], x_key='epoch',
def main(): parser = argparse.ArgumentParser(description='Chainer example: MNIST') parser.add_argument('--batchsize', '-b', type=int, default=100, help='Number of images in each mini-batch') parser.add_argument('--epoch', '-e', type=int, default=20, help='Number of sweeps over the dataset to train') parser.add_argument('--frequency', '-f', type=int, default=-1, help='Frequency of taking a snapshot') parser.add_argument('--gpu', '-g', type=int, default=-1, help='GPU ID (negative value indicates CPU)') parser.add_argument('--out', '-o', default='result', help='Directory to output the result') parser.add_argument('--resume', '-r', default='', help='Resume the training from snapshot') parser.add_argument('--unit', '-u', type=int, default=1000, help='Number of units') args = parser.parse_args() print('GPU: {}'.format(args.gpu)) print('# unit: {}'.format(args.unit)) print('# Minibatch-size: {}'.format(args.batchsize)) print('# epoch: {}'.format(args.epoch)) print('') # Set up a neural network to train # Classifier reports softmax cross entropy loss and accuracy at every # iteration, which will be used by the PrintReport extension below. model_mlp = MLP(args.unit, 10) model = L.Classifier(model_mlp) if args.gpu >= 0: # Make a specified GPU current chainer.cuda.get_device_from_id(args.gpu).use() model.to_gpu() # Copy the model to the GPU # Setup an optimizer optimizer = chainer.optimizers.Adam() optimizer.setup(model) # Load the MNIST dataset train, test = chainer.datasets.get_mnist() x, y = getRedisDataset() train = catDatasets(train, x, y) train_iter = chainer.iterators.SerialIterator(train, args.batchsize) test_iter = chainer.iterators.SerialIterator(test, args.batchsize, repeat=False, shuffle=False) # Set up a trainer updater = training.StandardUpdater(train_iter, optimizer, device=args.gpu) trainer = training.Trainer(updater, (args.epoch, 'epoch'), out=args.out) # Evaluate the model with the test dataset for each epoch trainer.extend(extensions.Evaluator(test_iter, model, device=args.gpu)) # Dump a computational graph from 'loss' variable at the first iteration # The "main" refers to the target link of the "main" optimizer. trainer.extend(extensions.dump_graph('main/loss')) # Take a snapshot for each specified epoch frequency = args.epoch if args.frequency == -1 else max(1, args.frequency) # trainer.extend(extensions.snapshot(), trigger=(frequency, 'epoch')) # Write a log of evaluation statistics for each epoch trainer.extend(extensions.LogReport()) # Save two plot images to the result dir if extensions.PlotReport.available(): trainer.extend( extensions.PlotReport(['main/loss', 'validation/main/loss'], 'epoch', file_name='loss.png')) trainer.extend( extensions.PlotReport( ['main/accuracy', 'validation/main/accuracy'], 'epoch', file_name='accuracy.png')) # Print selected entries of the log to stdout # Here "main" refers to the target link of the "main" optimizer again, and # "validation" refers to the default name of the Evaluator extension. # Entries other than 'epoch' are reported by the Classifier link, called by # either the updater or the evaluator. trainer.extend( extensions.PrintReport([ 'epoch', 'main/loss', 'validation/main/loss', 'main/accuracy', 'validation/main/accuracy', 'elapsed_time' ])) # Print a progress bar to stdout trainer.extend(extensions.ProgressBar()) if args.resume: # Resume from a snapshot chainer.serializers.load_npz(args.resume, trainer) # Run the training trainer.run() # save model chainer.serializers.save_npz(args.out + '/pretrained_model', model_mlp)
def check_mnist(use_gpu, use_chainerx, display_log=True): epoch = 5 batchsize = 100 n_units = 100 warnings.filterwarnings(action='always', category=DeprecationWarning) model = L.Classifier(MLP(n_units, 10)) comm = chainermn.create_communicator('naive') if use_gpu: device = testing.get_device(comm.intra_rank, use_chainerx) device.use() model.to_device(device) else: device = testing.get_device(use_chainerx=use_chainerx) optimizer = chainermn.create_multi_node_optimizer( chainer.optimizers.Adam(), comm) optimizer.setup(model) if comm.rank == 0: train, test = chainer.datasets.get_mnist() else: train, test = None, None train = chainermn.scatter_dataset(train, comm, shuffle=True) test = chainermn.scatter_dataset(test, comm, shuffle=True) train_iter = chainer.iterators.SerialIterator(train, batchsize) test_iter = chainer.iterators.SerialIterator(test, batchsize, repeat=False, shuffle=False) updater = training.StandardUpdater( train_iter, optimizer, device=device ) trainer = training.Trainer(updater, (epoch, 'epoch')) # Wrap standard Chainer evaluators by MultiNodeEvaluator. evaluator = extensions.Evaluator(test_iter, model, device=device) evaluator = chainermn.create_multi_node_evaluator(evaluator, comm) trainer.extend(evaluator) # Add checkpointer. This is just to check checkpointing runs # without errors path = tempfile.mkdtemp(dir='/tmp', prefix=__name__ + '-tmp-') checkpointer = create_multi_node_checkpointer(name=__name__, comm=comm, path=path) trainer.extend(checkpointer, trigger=(1, 'epoch')) # Some display and output extensions are necessary only for one worker. # (Otherwise, there would just be repeated outputs.) if comm.rank == 0 and display_log: trainer.extend(extensions.LogReport(trigger=(1, 'epoch')), trigger=(1, 'epoch')) trainer.extend(extensions.PrintReport(['epoch', 'main/loss', 'validation/main/loss', 'main/accuracy', 'validation/main/accuracy', 'elapsed_time'], out=sys.stderr), trigger=(1, 'epoch')) trainer.run() err = evaluator()['validation/main/accuracy'] assert err > 0.95 # Check checkpointer successfully finalized snapshot directory assert [] == os.listdir(path) os.removedirs(path)