insize = 224 train_list = load_image_list(args.train) val_list = load_image_list(args.val) mean_image = np.load(args.mean) encoders = [] decoders = [] # Train layer 1 if args.conv1_1 == None: converter1_1 = converter_generator(encoders) loader1_1 = loader_generator(insize, train_list, mean_image, converter1_1, use_gpu=use_gpu) optimizer1_1 = optimizers.MomentumSGD(lr=0.01, momentum=0.9) trainer1_1 = Trainer(conv1_1, optimizer1_1, loader1_1, max_epochs=1, logger=logger) trainer1_1.loop(len(train_list), args.batchsize) f1_1 = open('pkl/conv1_1.pkl', 'wb') pickle.dump(conv1_1, f1_1) f1_1.close() else: f1_1 = open(args.conv1_1, 'rb') conv1_1 = pickle.load(f1_1) f1_1.close()
def create(self): return optimizers.MomentumSGD(0.1)
elif args.feature == "mel": model = models.Mel(pre=is_pre) elif args.feature == "mfcc": model = models.Mfcc(pre=is_pre) if not is_pre: print 'Load model from', save_name + ".model" serializers.load_hdf5(save_name + ".model", model) if args.gpu >= 0: cuda.get_device(args.gpu).use() model.to_gpu() xp = np if args.gpu < 0 else cuda.cupy optimizer = optimizers.Adam( ) if args.learning == "adam" else optimizers.MomentumSGD(lr=0.01, momentum=0.9) optimizer.setup(model) print "algorithm is ", args.learning for epoch in tqdm(xrange(1, args.epoch + 1)): #print 'epoch ', epoch,"/",args.epoch #if args.learning == "sgd": # print "learning rate : ",optimizer.lr x_batch = np.ndarray((batchsize, model.insize), dtype=np.float32) y_batch = np.ndarray((batchsize, ), dtype=np.int32) random.shuffle(train_list) sum_accuracy = 0 sum_loss = 0 count = 0 batch_range = range(N)
return h train_data, test_data = get_mnist(n_train=1000, n_test=100, with_label=False, classes=[0]) num_train_it = 2000 batchsize = 10 dis = discriminator() gen = generator() optimizer4gen = optimizers.MomentumSGD() optimizer4gen.setup(gen) optimizer4dis = optimizers.MomentumSGD() optimizer4dis.setup(dis) losscoll_dis = [] losscoll_gen = [] for trainit in range(num_train_it): z = chainer.Variable( np.random.uniform(-1, 1, (batchsize, 100)).astype('float32')) gendata = gen(z)
def main(): parser = argparse.ArgumentParser(description='training mnist') parser.add_argument('--gpu', '-g', default=-1, type=int, help='GPU ID (negative value indicates CPU)') parser.add_argument('--epoch', '-e', type=int, default=300, help='Number of sweeps over the dataset to train') parser.add_argument('--batchsize', '-b', type=int, default=100, help='Number of images in each mini-batch') parser.add_argument('--seed', '-s', type=int, default=0, help='Random seed') parser.add_argument('--n_fold', '-nf', type=int, default=5, help='n_fold cross validation') parser.add_argument('--fold', '-f', type=int, default=1) parser.add_argument('--out_dir_name', '-dn', type=str, default=None, help='Name of the output directory') parser.add_argument('--report_trigger', '-rt', type=str, default='1e', help='Interval for reporting(Ex.100i, default:1e)') parser.add_argument('--save_trigger', '-st', type=str, default='1e', help='Interval for saving the model' '(Ex.100i, default:1e)') parser.add_argument('--load_model', '-lm', type=str, default=None, help='Path of the model object to load') parser.add_argument('--load_optimizer', '-lo', type=str, default=None, help='Path of the optimizer object to load') args = parser.parse_args() if args.out_dir_name is None: start_time = datetime.now() out_dir = Path('output/{}'.format(start_time.strftime('%Y%m%d_%H%M'))) else: out_dir = Path('output/{}'.format(args.out_dir_name)) random.seed(args.seed) np.random.seed(args.seed) cupy.random.seed(args.seed) chainer.config.cudnn_deterministic = True # model = ModifiedClassifier(SEResNeXt50()) # model = ModifiedClassifier(SERes2Net50()) model = ModifiedClassifier(SEResNeXt101()) if args.load_model is not None: serializers.load_npz(args.load_model, model) if args.gpu >= 0: chainer.cuda.get_device_from_id(args.gpu).use() model.to_gpu() optimizer = optimizers.MomentumSGD(lr=0.1, momentum=0.9) optimizer.setup(model) optimizer.add_hook(chainer.optimizer_hooks.WeightDecay(1e-4)) if args.load_optimizer is not None: serializers.load_npz(args.load_optimizer, optimizer) n_fold = args.n_fold slices = [slice(i, None, n_fold) for i in range(n_fold)] fold = args.fold - 1 # model1 # augmentation = [ # ('Rotate', {'p': 0.8, 'limit': 5}), # ('PadIfNeeded', {'p': 0.5, 'min_height': 28, 'min_width': 30}), # ('PadIfNeeded', {'p': 0.5, 'min_height': 30, 'min_width': 28}), # ('Resize', {'p': 1.0, 'height': 28, 'width': 28}), # ('RandomScale', {'p': 1.0, 'scale_limit': 0.1}), # ('PadIfNeeded', {'p': 1.0, 'min_height': 32, 'min_width': 32}), # ('RandomCrop', {'p': 1.0, 'height': 28, 'width': 28}), # ('Mixup', {'p': 0.5}), # ('Cutout', {'p': 0.5, 'num_holes': 4, 'max_h_size': 4, # 'max_w_size': 4}), # ] # resize = None # model2 augmentation = [ ('Rotate', { 'p': 0.8, 'limit': 5 }), ('PadIfNeeded', { 'p': 0.5, 'min_height': 28, 'min_width': 32 }), ('PadIfNeeded', { 'p': 0.5, 'min_height': 32, 'min_width': 28 }), ('Resize', { 'p': 1.0, 'height': 32, 'width': 32 }), ('RandomScale', { 'p': 1.0, 'scale_limit': 0.1 }), ('PadIfNeeded', { 'p': 1.0, 'min_height': 36, 'min_width': 36 }), ('RandomCrop', { 'p': 1.0, 'height': 32, 'width': 32 }), ('Mixup', { 'p': 0.5 }), ('Cutout', { 'p': 0.5, 'num_holes': 4, 'max_h_size': 4, 'max_w_size': 4 }), ] resize = [('Resize', {'p': 1.0, 'height': 32, 'width': 32})] train_data = KMNIST(augmentation=augmentation, drop_index=slices[fold], pseudo_labeling=True) valid_data = KMNIST(augmentation=resize, index=slices[fold]) train_iter = iterators.SerialIterator(train_data, args.batchsize) valid_iter = iterators.SerialIterator(valid_data, args.batchsize, repeat=False, shuffle=False) updater = StandardUpdater(train_iter, optimizer, device=args.gpu) trainer = Trainer(updater, (args.epoch, 'epoch'), out=out_dir) report_trigger = (int(args.report_trigger[:-1]), 'iteration' if args.report_trigger[-1] == 'i' else 'epoch') trainer.extend(extensions.LogReport(trigger=report_trigger)) trainer.extend(extensions.Evaluator(valid_iter, model, device=args.gpu), name='val', trigger=report_trigger) trainer.extend(extensions.PrintReport([ 'epoch', 'iteration', 'main/loss', 'main/accuracy', 'val/main/loss', 'val/main/accuracy', 'elapsed_time' ]), trigger=report_trigger) trainer.extend( extensions.PlotReport(['main/loss', 'val/main/loss'], x_key=report_trigger[1], marker='.', file_name='loss.png', trigger=report_trigger)) trainer.extend( extensions.PlotReport(['main/accuracy', 'val/main/accuracy'], x_key=report_trigger[1], marker='.', file_name='accuracy.png', trigger=report_trigger)) save_trigger = (int(args.save_trigger[:-1]), 'iteration' if args.save_trigger[-1] == 'i' else 'epoch') trainer.extend(extensions.snapshot_object( model, filename='model_{0}-{{.updater.{0}}}.npz'.format(save_trigger[1])), trigger=save_trigger) trainer.extend(extensions.snapshot_object( optimizer, filename='optimizer_{0}-{{.updater.{0}}}.npz'.format(save_trigger[1])), trigger=save_trigger) trainer.extend(extensions.ProgressBar()) trainer.extend(CosineAnnealing(lr_max=0.1, lr_min=1e-6, T_0=20), trigger=(1, 'epoch')) best_model_trigger = triggers.MaxValueTrigger('val/main/accuracy', trigger=(1, 'epoch')) trainer.extend(extensions.snapshot_object(model, filename='best_model.npz'), trigger=best_model_trigger) trainer.extend(extensions.snapshot_object(optimizer, filename='best_optimizer.npz'), trigger=best_model_trigger) best_loss_model_trigger = triggers.MinValueTrigger('val/main/loss', trigger=(1, 'epoch')) trainer.extend(extensions.snapshot_object(model, filename='best_loss_model.npz'), trigger=best_loss_model_trigger) trainer.extend(extensions.snapshot_object( optimizer, filename='best_loss_optimizer.npz'), trigger=best_loss_model_trigger) if out_dir.exists(): shutil.rmtree(out_dir) out_dir.mkdir() # Write parameters text with open(out_dir / 'train_params.txt', 'w') as f: f.write('model: {}\n'.format(model.predictor.__class__.__name__)) f.write('n_epoch: {}\n'.format(args.epoch)) f.write('batch_size: {}\n'.format(args.batchsize)) f.write('n_data_train: {}\n'.format(len(train_data))) f.write('n_data_val: {}\n'.format(len(valid_data))) f.write('seed: {}\n'.format(args.seed)) f.write('n_fold: {}\n'.format(args.n_fold)) f.write('fold: {}\n'.format(args.fold)) f.write('augmentation: \n') for process, param in augmentation: f.write(' {}: {}\n'.format(process, param)) trainer.run()
def train(): args = configuration() ## Parameter Information Display out print('===================================================') if args.test: print('Num of Minibatch Size: 1'.format(args.batch)) else: print('Num of Minibatch Size: {}'.format(args.batch)) print('Num of Epoch : {}'.format(args.epoch)) if args.gpu >= 0: print('GPU Number : {}'.format(args.gpu)) else: print('Training with CPU only.') print('===================================================') ## Set up the Training Network model = Brief_CNN() if args.model is not None: print('Loading Brief CNN model from {}'.format(args.model)) serializers.load_npz(args.model, model) if args.gpu >= 0: cuda.get_device_from_id(args.gpu).use() model.to_gpu() ## Set up the Optimizer ## Kind of Optimizers are MomentumSGD. optimizer = optimizers.MomentumSGD(lr=0.0001) optimizer.setup(model) if args.opt is not None: print('Loading Brief CNN Optimizer from {}'.format(args.opt)) serializers.load_npz(args.opt, optimizer) if args.test: test = load_single_image(args.img) xp = model.xp test = xp.asarray(test, dtype=xp.float32) with chainer.using_config('train', False): val = model(test) prob = F.softmax(val).data prob = xp.reshape(prob, (prob.shape[1])) max_val_ind = xp.ndarray.argmax(prob) max_val = prob[max_val_ind] * 100 print('>>> {} : {} %'.format(max_val_ind, max_val)) else: train = make_dataset(args.img) train_iter = iterators.SerialIterator(train, batch_size=args.batch) log_filename = 'log_train' updater = CNNUpdater(net_model=model, iterator={'main': train_iter}, optimizer={'optimizer': optimizer}, device=args.gpu) trainer = training.Trainer(updater, (args.epoch, 'epoch'), out='results') trainer.extend( extensions.LogReport(trigger=(1, 'epoch'), log_name=log_filename)) trainer.extend(extensions.PrintReport(['epoch', 'Loss', 'Acc'])) trainer.extend(extensions.snapshot_object(model, 'model'), trigger=(10, 'epoch')) trainer.extend(extensions.snapshot_object(optimizer, 'optimizer'), trigger=(10, 'epoch')) trainer.extend(extensions.ProgressBar(update_interval=1)) trainer.run() modelname = 'results/model' print('Saving Brief CNN model to {}'.format(modelname)) serializers.save_npz(modelname, model) optname = 'results/optimizer' print('Saving Brief CNN optimizer to {}'.format(optname)) serializers.save_npz(optname, optimizer) print('OVER')
def run(data_file, is_train=False, **args): is_test = not is_train batchsize = args['batchsize'] model_name = args['model_name'] optimizer_name = args['optimizer'] save_dir = args['save_dir'] print args if save_dir[-1] != '/': save_dir = save_dir + '/' # TODO: check save_dir exist if not os.path.isdir(save_dir): err_msg = 'There is no dir : {}\n'.format(save_dir) err_msg += '##############################\n' err_msg += '## Please followiing: \n' err_msg += '## $ mkdir {}\n'.format(save_dir) err_msg += '##############################\n' raise ValueError(err_msg) save_name = args['save_name'] if save_name == '': save_name = '_'.join([model_name, optimizer_name]) save_name = save_dir + save_name xp = cuda.cupy if args['gpu'] >= 0 else np if args['gpu'] >= 0: cuda.get_device(args['gpu']).use() xp.random.seed(1234) # load files dev_file = args['dev_file'] test_file = args['test_file'] delimiter = args['delimiter'] sentences_train = [] if is_train: sentences_train = util.read_conll_file(filename=data_file, delimiter=delimiter, input_idx=0, output_idx=-1) if len(sentences_train) == 0: s = str(len(sentences_train)) err_msg = 'Invalid training sizes: {} sentences. '.format(s) raise ValueError(err_msg) else: # Predict sentences_train = util.read_raw_file(filename=data_file, delimiter=u' ') # sentences_train = sentences_train[:100] sentences_dev = [] sentences_test = [] if dev_file: sentences_dev = util.read_conll_file(dev_file, delimiter=delimiter, input_idx=0, output_idx=-1) if test_file: sentences_test = util.read_conll_file(test_file, delimiter=delimiter, input_idx=0, output_idx=-1) save_vocab = save_name + '.vocab' save_vocab_char = save_name + '.vocab_char' save_tags_vocab = save_name + '.vocab_tag' save_train_config = save_name + '.train_config' # TODO: check unkown pos tags # TODO: compute unk words if is_train: sentences_words_train = [w_obj[0] for w_obj in sentences_train] vocab = util.build_vocab(sentences_words_train) vocab_char = util.build_vocab(util.flatten(sentences_words_train)) vocab_tags = util.build_tag_vocab(sentences_train) elif is_test: vocab = util.load_vocab(save_vocab) vocab_char = util.load_vocab(save_vocab_char) vocab_tags = util.load_vocab(save_tags_vocab) PAD_IDX = vocab[PADDING] UNK_IDX = vocab[UNKWORD] CHAR_PAD_IDX = vocab_char[PADDING] CHAR_UNK_IDX = vocab_char[UNKWORD] def parse_to_word_ids(sentences): return util.parse_to_word_ids(sentences, xp=xp, vocab=vocab, UNK_IDX=UNK_IDX, idx=0) def parse_to_char_ids(sentences): return util.parse_to_char_ids(sentences, xp=xp, vocab_char=vocab_char, UNK_IDX=CHAR_UNK_IDX, idx=0) def parse_to_tag_ids(sentences): return util.parse_to_tag_ids(sentences, xp=xp, vocab=vocab_tags, UNK_IDX=-1, idx=-1) # if is_train: x_train = parse_to_word_ids(sentences_train) x_char_train = parse_to_char_ids(sentences_train) y_train = parse_to_tag_ids(sentences_train) # elif is_test: # x_predict = parse_to_word_ids(sentences_predict) # x_char_predict = parse_to_char_ids(sentences_predict) # y_predict = parse_to_tag_ids(sentences_predict) x_dev = parse_to_word_ids(sentences_dev) x_char_dev = parse_to_char_ids(sentences_dev) y_dev = parse_to_tag_ids(sentences_dev) x_test = parse_to_word_ids(sentences_test) x_char_test = parse_to_char_ids(sentences_test) y_test = parse_to_tag_ids(sentences_test) cnt_train_unk = sum([xp.sum(d == UNK_IDX) for d in x_train]) cnt_train_word = sum([d.size for d in x_train]) unk_train_unk_rate = float(cnt_train_unk) / cnt_train_word cnt_dev_unk = sum([xp.sum(d == UNK_IDX) for d in x_dev]) cnt_dev_word = sum([d.size for d in x_dev]) unk_dev_unk_rate = float(cnt_dev_unk) / max(cnt_dev_word, 1) logging.info('train:' + str(len(x_train))) logging.info('dev :' + str(len(x_dev))) logging.info('test :' + str(len(x_test))) logging.info('vocab :' + str(len(vocab))) logging.info('vocab_tags:' + str(len(vocab_tags))) logging.info('unk count (train):' + str(cnt_train_unk)) logging.info('unk rate (train):' + str(unk_train_unk_rate)) logging.info('cnt all words (train):' + str(cnt_train_word)) logging.info('unk count (dev):' + str(cnt_dev_unk)) logging.info('unk rate (dev):' + str(unk_dev_unk_rate)) logging.info('cnt all words (dev):' + str(cnt_dev_word)) # show model config logging.info('######################') logging.info('## Model Config') logging.info('model_name:' + str(model_name)) logging.info('batchsize:' + str(batchsize)) logging.info('optimizer:' + str(optimizer_name)) # Save model config logging.info('######################') logging.info('## Model Save Config') logging.info('save_dir :' + str(save_dir)) # save vocab logging.info('save_vocab :' + save_vocab) logging.info('save_vocab_char :' + save_vocab_char) logging.info('save_tags_vocab :' + save_tags_vocab) logging.info('save_train_config :' + save_train_config) util.write_vocab(save_vocab, vocab) util.write_vocab(save_vocab_char, vocab_char) util.write_vocab(save_tags_vocab, vocab_tags) util.write_vocab(save_train_config, args) net = BiLSTM_CNN_CRF(n_vocab=len(vocab), n_char_vocab=len(vocab_char), emb_dim=args['n_word_emb'], hidden_dim=args['n_hidden'], n_layers=args['n_layer'], init_emb=None, n_label=len(vocab_tags)) if args['word_emb_file']: # set Pre-trained embeddings # emb_file = './emb/glove.6B.100d.txt' emb_file = args['word_emb_file'] word_ids, word_vecs = util.load_glove_embedding(emb_file, vocab) net.word_embed.W.data[word_ids] = word_vecs if args['gpu'] >= 0: net.to_gpu() init_alpha = args['init_lr'] if optimizer_name == 'adam': opt = optimizers.Adam(alpha=init_alpha, beta1=0.9, beta2=0.9) elif optimizer_name == 'adadelta': opt = optimizers.AdaDelta() if optimizer_name == 'sgd_mom': opt = optimizers.MomentumSGD(lr=init_alpha, momentum=0.9) if optimizer_name == 'sgd': opt = optimizers.SGD(lr=init_alpha) opt.setup(net) opt.add_hook(chainer.optimizer.GradientClipping(5.0)) def eval_loop(x_data, x_char_data, y_data): # dev or test net.set_train(train=False) iteration_list = range(0, len(x_data), batchsize) perm = np.random.permutation(len(x_data)) sum_loss = 0.0 predict_lists = [] for i_index, index in enumerate(iteration_list): data = [(x_data[i], x_char_data[i], y_data[i]) for i in perm[index:index + batchsize]] x, x_char, target_y = zip(*data) output = net(x_data=x, x_char_data=x_char) predict, loss = net.predict(output, target_y) sum_loss += loss.data predict_lists.extend(predict) return predict_lists, sum_loss if is_test: # predict model_filename = args['model_filename'] model_filename = save_dir + model_filename serializers.load_hdf5(model_filename, net) vocab_tags_inv = dict([(v, k) for k, v in vocab_tags.items()]) x_predict = x_train x_char_predict = x_char_train y_predict = y_train predict_pairs, _ = eval_loop(x_predict, x_char_predict, y_predict) _, predict_tags = zip(*predict_pairs) for predict in predict_tags: predict = [vocab_tags_inv[tag_idx] for tag_idx in to_cpu(predict)] print predict return False tmax = args['max_iter'] t = 0.0 for epoch in xrange(args['max_iter']): # train net.set_train(train=True) iteration_list = range(0, len(x_train), batchsize) perm = np.random.permutation(len(x_train)) sum_loss = 0.0 predict_train = [] for i_index, index in enumerate(iteration_list): data = [(x_train[i], x_char_train[i], y_train[i]) for i in perm[index:index + batchsize]] x, x_char, target_y = zip(*data) output = net(x_data=x, x_char_data=x_char) predict, loss = net.predict(output, target_y) # loss sum_loss += loss.data # update net.zerograds() loss.backward() opt.update() predict_train.extend(predict) # Evaluation train_accuracy = util.eval_accuracy(predict_train) logging.info('epoch:' + str(epoch)) logging.info(' [train]') logging.info(' loss :' + str(sum_loss)) logging.info(' accuracy :' + str(train_accuracy)) # Dev predict_dev, loss_dev = eval_loop(x_dev, x_char_dev, y_dev) # Evaluation dev_accuracy = util.eval_accuracy(predict_dev) logging.info(' [dev]') logging.info(' loss :' + str(loss_dev)) logging.info(' accuracy :' + str(dev_accuracy)) # Save model model_filename = save_name + '_epoch' + str(epoch) serializers.save_hdf5(model_filename + '.model', net) serializers.save_hdf5(model_filename + '.state', opt)
def pretraining(): parser = argparse.ArgumentParser() parser.add_argument('--gpu', type=int, default=-1) parser.add_argument('--seed', type=int, default=0) parser.add_argument('--batchsize', type=int, default=256) args = parser.parse_args() xp = np gpu_id = args.gpu seed = args.seed train, _ = mnist.get_mnist() train, _ = convert.concat_examples(train, device=gpu_id) batchsize = args.batchsize model = StackedDenoisingAutoEncoder(input_dim=train.shape[1]) if chainer.cuda.available and args.gpu >= 0: import cupy as cp xp = cp model.to_gpu(gpu_id) xp.random.seed(seed) # Layer-Wise Pretrain print("Layer-Wise Pretrain") for i, dae in enumerate(model.children()): print("Layer {}".format(i + 1)) train_tuple = tuple_dataset.TupleDataset(train, train) train_iter = iterators.SerialIterator(train_tuple, batchsize) clf = L.Classifier(dae, lossfun=mean_squared_error) clf.compute_accuracy = False if chainer.cuda.available and args.gpu >= 0: clf.to_gpu(gpu_id) optimizer = optimizers.MomentumSGD(lr=0.1) optimizer.setup(clf) updater = training.StandardUpdater(train_iter, optimizer, device=gpu_id) trainer = training.Trainer(updater, (50000, "iteration"), out="mnist_result") trainer.extend(extensions.LogReport()) trainer.extend( extensions.PrintReport(['iteration', 'main/loss', 'elapsed_time'])) trainer.extend(ChangeLearningRate(), trigger=(20000, "iteration")) trainer.run() train = dae.encode(train).data # Finetuning print("fine tuning") with chainer.using_config("train", False): train, _ = mnist.get_mnist() train, _ = convert.concat_examples(train, device=gpu_id) train_tuple = tuple_dataset.TupleDataset(train, train) train_iter = iterators.SerialIterator(train_tuple, batchsize) model = L.Classifier(model, lossfun=mean_squared_error) model.compute_accuracy = False if chainer.cuda.available and args.gpu >= 0: model.to_gpu(gpu_id) optimizer = optimizers.MomentumSGD(lr=0.1) optimizer.setup(model) updater = training.StandardUpdater(train_iter, optimizer, device=gpu_id) trainer = training.Trainer(updater, (100000, "iteration"), out="mnist_result") trainer.extend(extensions.LogReport()) trainer.extend( extensions.PrintReport(['iteration', 'main/loss', 'elapsed_time'])) trainer.extend(ChangeLearningRate(), trigger=(20000, "iteration")) trainer.run() outfile = "StackedDenoisingAutoEncoder-seed{}.model".format(seed) serializers.save_npz(outfile, model.predictor)
def main(): parser = argparse.ArgumentParser(description='Train stargan voice convertor') parser.add_argument( '--gpu', type=int, default=-1, help='GPU ID (negative value indicates CPU)') parser.add_argument("--train_data", type=Path, required=True, help="training data") parser.add_argument("--speaker_id", type=Path, required=True, help="speaker_id file") parser.add_argument("--output_file", type=Path, required=True) parser.add_argument( '--epoch', default=6000, type=int, help='number of epochs to learn') parser.add_argument("--epoch_start", type=int, default=0) parser.add_argument( '--snapshot', default=100, type=int, help='interval of snapshot') parser.add_argument( '--batchsize', type=int, default=4, help='Batch size') parser.add_argument( '--optimizer', default='Adam', choices=["Adam", "MomentumSGD", "RMSprop"], type=str, help='optimizer to use: Adam, MomentumSGD, RMSprop') parser.add_argument( '--lrate', default='0.00001', type=float, help='learning rate for Adam, MomentumSGD or RMSprop') parser.add_argument( '--genpath', type=str, help='path for a pretrained generator') parser.add_argument( '--clspath', type=str, help='path for a pretrained classifier') parser.add_argument( '--advdispath', type=str, help='path for a pretrained real/fake discriminator') args = parser.parse_args() epsi = sys.float_info.epsilon output_file = args.output_file output_dir = output_file.with_suffix("") output_dir.mkdir(exist_ok=True, parents=True) all_source = np.load(args.train_data) Speakers, SpeakerIndividualKeys = separate_speaker(np.load(args.speaker_id)) NormalizedAllData = get_separated_values(all_source, SpeakerIndividualKeys) SpeakerNum = len(Speakers) # Set input directories EpochNum = args.epoch BatchSize = args.batchsize SentenceNum = [len(SpeakerIndividualKeys[s]) for s in range(SpeakerNum)] MaxSentenceNum = max(SentenceNum) print('#GPU: {}'.format(args.gpu)) print('#epoch: {}'.format(EpochNum)) print('Optimizer: {}'.format(args.optimizer)) print('Learning rate: {}'.format(args.lrate)) print('Snapshot: {}'.format(args.snapshot)) # Set up model num_mels = 36 zdim = 5 hdim = 32 cdim = 8 adim = 32 # num_mels = data.shape[0] (36dim) # zdim = 8 # hdim = 32 generator_class = net.Generator1 classifier_class = net.Classifier1 discriminator_class = net.AdvDiscriminator1 loss_class = net.Loss1 generator = generator_class(SpeakerNum) # paranum = sum(p.data.size for p in generator.params()) # print('Parameter #: {}'.format(paranum)) # cdim = 8 classifier = classifier_class(num_mels, SpeakerNum, cdim) # paranum = sum(p.data.size for p in classifier.params()) # print('Parameter #: {}'.format(paranum)) # adim = 32 adverserial_discriminator = discriminator_class(num_mels, SpeakerNum, adim) # adverserial_discriminator = net.AdvDiscriminator_noactive(num_mels, SpeakerNum, adim) # paranum = sum(p.data.size for p in adverserial_discriminator.params()) # print('Parameter #: {}'.format(paranum)) if args.genpath is not None: try: serializers.load_npz(args.genpath, generator) except: print('Could not load generator.') if args.clspath is not None: try: serializers.load_npz(args.clspath, classifier) except: print('Could not load domain classifier.') if args.advdispath is not None: try: serializers.load_npz(args.advdispath, adverserial_discriminator) except: print('Could not load real/fake discriminator.') if args.gpu >= 0: chainer.cuda.get_device(args.gpu).use() generator.to_gpu() classifier.to_gpu() adverserial_discriminator.to_gpu() xp = np if args.gpu < 0 else cuda.cupy # Set up optimziers # loss = net.Loss1(generator, classifier, adverserial_discriminator) loss = loss_class(generator, classifier, adverserial_discriminator) w_adv = 1.0 w_cls = 1.0 w_cyc = 1.0 w_rec = 1.0 if args.optimizer == 'MomentumSGD': opt_gen = optimizers.MomentumSGD(lr=args.lrate, momentum=0.9) opt_cls = optimizers.MomentumSGD(lr=args.lrate, momentum=0.9) opt_advdis = optimizers.MomentumSGD(lr=args.lrate, momentum=0.9) elif args.optimizer == 'Adam': opt_gen = optimizers.Adam(alpha=0.001, beta1=0.9) opt_cls = optimizers.Adam(alpha=0.00005, beta1=0.5) opt_advdis = optimizers.Adam(alpha=0.00001, beta1=0.5) elif args.optimizer == 'RMSprop': opt_gen = optimizers.RMSprop(lr=args.lrate) opt_cls = optimizers.RMSprop(lr=args.lrate) opt_advdis = optimizers.RMSprop(lr=args.lrate) opt_gen.setup(generator) opt_cls.setup(classifier) opt_advdis.setup(adverserial_discriminator) AllCombinationPairs = list(itertools.combinations(range(SpeakerNum), 2)) # train for epoch in trange(args.epoch_start, EpochNum+1): # shuffled_indexes[speaker_idx][idx]: value is index of NormalizedAllData[speaker_idx][**here**] shuffled_indexes = [myperm(SentenceNum[s], MaxSentenceNum) for s in range(SpeakerNum)] for n in range(MaxSentenceNum//BatchSize): # batchlist_mcep[speaker_idx][sentence_idx_in_batch] batchlist_mcep = [] begin_idx = n * BatchSize end_idx = begin_idx + BatchSize # not include @ end_idx for s in range(SpeakerNum): batch_tmp = [] for idx in shuffled_indexes[s][begin_idx:end_idx]: batch_tmp.append( NormalizedAllData[s][idx].T ) # Transpose here!! batchlist_mcep.append(batch_tmp) # Convert batchlist into a list of arrays X = [batchlist2array(batchlist) for batchlist in batchlist_mcep] xin = [chainer.Variable(xp.asarray(Xs, dtype=np.float32)) for Xs in X] # Iterate through all speaker pairs random.shuffle(AllCombinationPairs) for s0, s1 in AllCombinationPairs: AdvLoss_d, AdvLoss_g, ClsLoss_r, ClsLoss_f, CycLoss, RecLoss \ = loss.calc_loss(xin[s0], xin[s1], s0, s1, SpeakerNum) gen_loss = (w_adv * AdvLoss_g + w_cls * ClsLoss_f + w_cyc * CycLoss + w_rec * RecLoss) cls_loss = ClsLoss_r advdis_loss = AdvLoss_d generator.cleargrads() gen_loss.backward() opt_gen.update() classifier.cleargrads() cls_loss.backward() opt_cls.update() adverserial_discriminator.cleargrads() advdis_loss.backward() opt_advdis.update() print('epoch {}, mini-batch {}:'.format(epoch, n+1)) print('AdvLoss_d={}, AdvLoss_g={}, ClsLoss_r={}, ClsLoss_f={}' .format(AdvLoss_d.data, AdvLoss_g.data, ClsLoss_r.data, ClsLoss_f.data)) print('CycLoss={}, RecLoss={}' .format(CycLoss.data, RecLoss.data)) save_loss(output_dir, AdvLoss_d.data, AdvLoss_g.data, ClsLoss_r.data, ClsLoss_f.data, CycLoss.data, RecLoss.data) if epoch % args.snapshot == 0: snapshot_dir = output_dir / "snapshot" snapshot_dir.mkdir(exist_ok=True) snapshot(snapshot_dir, epoch, generator, classifier, adverserial_discriminator) snapshot_feature_dir = output_dir / "snapshot_feature" snapshot_feature_dir.mkdir(exist_ok=True) output = {} with chainer.no_backprop_mode(): for s in range(SpeakerNum): for key, mcep in zip(SpeakerIndividualKeys[s], NormalizedAllData[s]): mcep_T = mcep.T out = generator.hidden_layer(chainer.Variable(xp.asarray(mcep_T[np.newaxis,:,:], dtype=np.float32))) out = np.squeeze(cuda.to_cpu(out.data)) output[key] = out.T np.savez(snapshot_feature_dir / f"{output_file.stem}_epoch_{epoch:05}.npz", **output) # output final result output = {} with chainer.no_backprop_mode(): for s in range(SpeakerNum): for key, mcep in zip(SpeakerIndividualKeys[s], NormalizedAllData[s]): mcep_T = mcep.T out = generator.hidden_layer(chainer.Variable(xp.asarray(mcep_T[np.newaxis,:,:], dtype=np.float32))) out = np.squeeze(cuda.to_cpu(out.data)) output[key] = out.T np.savez(output_file, **output)
cifar_net = net.IdentityMapping(args.res_depth, swapout=args.swapout, skip=args.skip_depth) elif args.model == 'vgg_no_fc': cifar_net = net.VGGNoFC() elif args.model == 'vgg_wide': cifar_net = net.VGGWide() elif args.model == 'vgg_crelu': cifar_net = net.VGGCReLU() elif args.model == 'inception': cifar_net = net.Inception() elif args.model == 'pyramid': cifar_net = net.PyramidNet(args.res_depth) else: cifar_net = net.VGG() if args.optimizer == 'sgd': optimizer = optimizers.MomentumSGD(lr=args.lr) else: optimizer = optimizers.Adam(alpha=args.alpha) optimizer.setup(cifar_net) if args.weight_decay > 0: optimizer.add_hook(chainer.optimizer.WeightDecay(args.weight_decay)) cifar_trainer = trainer.CifarTrainer(cifar_net, optimizer, args.iter, args.batch_size, args.gpu) if args.prefix is None: model_prefix = '{}_{}'.format(args.model, args.optimizer) else: model_prefix = args.prefix state = {'best_valid_error': 100, 'best_test_error': 100, 'clock': time.clock()} def on_epoch_done(epoch, n, o, loss, acc, valid_loss, valid_acc, test_loss, test_acc): error = 100 * (1 - acc) valid_error = 100 * (1 - valid_acc)
def main(): parser = argparse.ArgumentParser() parser.add_argument('--gpu', type=int, default=-1) parser.add_argument('--batchsize', type=int, default=12) parser.add_argument('--class_weight', type=str, default='class_weight.npy') parser.add_argument('--out', type=str, default='result') args = parser.parse_args() # Triggers log_trigger = (50, 'iteration') validation_trigger = (2000, 'iteration') end_trigger = (16000, 'iteration') # Dataset train = CamVidDataset(split='train') def transform(in_data): img, label = in_data if np.random.rand() > 0.5: img = img[:, :, ::-1] label = label[:, ::-1] return img, label train = TransformDataset(train, transform) val = CamVidDataset(split='val') # Iterator train_iter = iterators.MultiprocessIterator(train, args.batchsize) val_iter = iterators.MultiprocessIterator(val, args.batchsize, shuffle=False, repeat=False) # Model class_weight = np.load(args.class_weight) model = SegNetBasic(n_class=11) model = PixelwiseSoftmaxClassifier(model, class_weight=class_weight) if args.gpu >= 0: chainer.cuda.get_device(args.gpu).use() # Make a specified GPU current model.to_gpu() # Copy the model to the GPU # Optimizer optimizer = optimizers.MomentumSGD(lr=0.1, momentum=0.9) optimizer.setup(model) optimizer.add_hook(chainer.optimizer.WeightDecay(rate=0.0005)) # Updater updater = training.StandardUpdater(train_iter, optimizer, device=args.gpu) # Trainer trainer = training.Trainer(updater, end_trigger, out=args.out) trainer.extend(extensions.LogReport(trigger=log_trigger)) trainer.extend(extensions.observe_lr(), trigger=log_trigger) trainer.extend(extensions.dump_graph('main/loss')) trainer.extend(TestModeEvaluator(val_iter, model, device=args.gpu), trigger=validation_trigger) if extensions.PlotReport.available(): trainer.extend( extensions.PlotReport(['main/loss', 'validation/main/loss'], x_key='iteration', file_name='loss.png')) trainer.extend( extensions.snapshot_object( model.predictor, filename='model_iteration-{.updater.iteration}', trigger=end_trigger)) trainer.extend(extensions.PrintReport([ 'epoch', 'iteration', 'elapsed_time', 'lr', 'main/loss', 'validation/main/loss' ]), trigger=log_trigger) trainer.extend(extensions.ProgressBar(update_interval=10)) trainer.run()
N = data.num_train N_test = data.num_test print('- number of training data: %d' % N) print('- number of test data: %d' % N_test) print('- number of labels: %d' % len(caltech['label_names'])) print('done.') # prepare network model = net.Classifier(net.AlexNet()) # initialize optimizer if args.optimizer == 'adam': optimizer = optimizers.Adam(args.alpha) if args.optimizer == 'momentumsgd': optimizer = optimizers.MomentumSGD(args.learningrate) optimizer.setup(model) # training loop print() print('start learning') if args.gpu >= 0: model.to_gpu() for epoch in range(0, n_epoch): print('epoch', epoch + 1) perm = np.random.permutation(N) permed_data = np.asarray(x_train[perm]) permed_target = xp.asarray(y_train[perm]) sum_accuracy = 0
def __init__(self, network, thresholdExits=None, percentTestExits=.9, percentTrainKeeps=1., lr=0.1, momentum=0.9, weight_decay=0.0001, alpha=0.001, opt="Adam", joint=True, verbose=False): self.opt = opt self.lr = lr self.alpha = alpha self.momentum = momentum self.weight_decay = weight_decay self.joint = joint self.forwardMain = None self.main = Net() self.models = [] starti = 0 curri = 0 for link in network: if not isinstance(link, Branch): curri += 1 self.main.add_link(link) else: net = Net(link.weight) net.starti = starti starti = curri net.endi = curri for prevlink in self.main: newlink = copy.deepcopy(prevlink) newlink.name = None net.add_link(newlink) for branchlink in link: newlink = copy.deepcopy(branchlink) newlink.name = None net.add_link(newlink) self.models.append(net) for branchlink in link: newlink = copy.deepcopy(branchlink) newlink.name = None self.main.add_link(newlink) if self.opt == 'MomentumSGD': self.optimizer = optimizers.MomentumSGD(lr=self.lr, momentum=self.momentum) else: self.optimizer = optimizers.Adam(alpha=self.alpha) self.optimizer.setup(self.main) if self.opt == 'MomentumSGD': self.optimizer.add_hook( chainer.optimizer.WeightDecay(self.weight_decay)) self.optimizers = [] for model in self.models: if self.opt == 'MomentumSGD': optimizer = optimizers.MomentumSGD(lr=self.lr, momentum=0.9) else: optimizer = optimizers.Adam() optimizer.setup(model) if self.opt == 'MomentumSGD': optimizer.add_hook( chainer.optimizer.WeightDecay(self.weight_decay)) self.optimizers.append(optimizer) self.percentTrainKeeps = percentTrainKeeps self.percentTestExits = percentTestExits self.thresholdExits = thresholdExits self.clearLearnedExitsThresholds() self.verbose = verbose self.gpu = False self.xp = np
train_dataset = VOC('train') valid_dataset = VOC('val') train_iter = iterators.SerialIterator(train_dataset, batchsize) model = FasterRCNN() chainer.serializers.load_npz('train_rpn/snapshot_571000', model) model.to_gpu(0) warmup(model, train_iter) model.rcnn_train = True # optimizer = optimizers.Adam() # optimizer.setup(model) optimizer = optimizers.MomentumSGD(lr=0.001) optimizer.setup(model) optimizer.add_hook(chainer.optimizer.WeightDecay(0.0005)) updater = training.StandardUpdater(train_iter, optimizer, device=0) trainer = training.Trainer(updater, (100, 'epoch'), out='train_rcnn') trainer.extend(extensions.LogReport(trigger=(100, 'iteration'))) trainer.extend(extensions.PrintReport([ 'epoch', 'iteration', 'main/loss_cls', 'main/cls_accuracy', 'main/loss_bbox', 'main/loss_rcnn', 'elapsed_time', ]),
def main(): parser = argparse.ArgumentParser(description='Chainer example: MNIST') parser.add_argument('--batchsize', '-b', type=int, default=100, help='Number of images in each mini-batch') parser.add_argument('--epochs', '-e', type=int, default=20, help='Number of sweeps over the dataset to train') parser.add_argument('--output_dir', '-o', default='./outputs', help='Directory to output the result') parser.add_argument('--gpu_id', '-g', default=0, help='ID of the GPU to be used. Set to -1 if you use CPU') args = parser.parse_args() # Download the MNIST data if you haven't downloaded it yet train, test = datasets.mnist.get_mnist(withlabel=True, ndim=1) gpu_id = args.gpu_id batchsize = args.batchsize epochs = args.epochs run.log('Batch size', np.int(batchsize)) run.log('Epochs', np.int(epochs)) train_iter = iterators.SerialIterator(train, batchsize) test_iter = iterators.SerialIterator(test, batchsize, repeat=False, shuffle=False) model = MyNetwork() if gpu_id >= 0: # Make a specified GPU current chainer.backends.cuda.get_device_from_id(0).use() model.to_gpu() # Copy the model to the GPU # Choose an optimizer algorithm optimizer = optimizers.MomentumSGD(lr=0.01, momentum=0.9) # Give the optimizer a reference to the model so that it # can locate the model's parameters. optimizer.setup(model) while train_iter.epoch < epochs: # ---------- One iteration of the training loop ---------- train_batch = train_iter.next() image_train, target_train = concat_examples(train_batch, gpu_id) # Calculate the prediction of the network prediction_train = model(image_train) # Calculate the loss with softmax_cross_entropy loss = F.softmax_cross_entropy(prediction_train, target_train) # Calculate the gradients in the network model.cleargrads() loss.backward() # Update all the trainable parameters optimizer.update() # --------------------- until here --------------------- # Check the validation accuracy of prediction after every epoch if train_iter.is_new_epoch: # If this iteration is the final iteration of the current epoch # Display the training loss print('epoch:{:02d} train_loss:{:.04f} '.format( train_iter.epoch, float(to_cpu(loss.array))), end='') test_losses = [] test_accuracies = [] while True: test_batch = test_iter.next() image_test, target_test = concat_examples(test_batch, gpu_id) # Forward the test data prediction_test = model(image_test) # Calculate the loss loss_test = F.softmax_cross_entropy(prediction_test, target_test) test_losses.append(to_cpu(loss_test.array)) # Calculate the accuracy accuracy = F.accuracy(prediction_test, target_test) accuracy.to_cpu() test_accuracies.append(accuracy.array) if test_iter.is_new_epoch: test_iter.epoch = 0 test_iter.current_position = 0 test_iter.is_new_epoch = False test_iter._pushed_position = None break val_accuracy = np.mean(test_accuracies) print('val_loss:{:.04f} val_accuracy:{:.04f}'.format( np.mean(test_losses), val_accuracy)) run.log("Accuracy", np.float(val_accuracy))
def train(view, set): # 5分割されたデータのロード gallery1, probe1 = load_res_invariant_dataset(view=view, set=set, type='SFDEI', frame='1') gallery2, probe2 = load_res_invariant_dataset(view=view, set=set, type='SFDEI', frame='2') # tensor-boardの定義 writer = SummaryWriter() # 学習の設定 save_dir = '/home/common-ns/PycharmProjects/Multiscale_SFDEINet/4inputs/models' + view + '/set_' + str( set) # save_dir = '/home/common-ns/setoguchi/chainer_files/Two_by_two_in/SRGAN1-2/' + view + '/set_' + str(set) os.mkdir(save_dir) batch_size = 239 max_iteration = 50000 id = 1 # gpu_id = chainer.backends.cuda.get_device_from_id(id) model = two_by_two_in() model.to_gpu(id) optimizer = optimizers.MomentumSGD(lr=0.01, momentum=0.9) optimizer.setup(model) # optimizer.add_hook(chainer.optimizer.WeightDecay(0.0005)) iteration = 1.0 i = 0 # 学習ループ # for i in range(1, epock+1): while iteration < max_iteration + 1: accum_loss = 0.0 count = 0 # データから正例,負例のペアを構築.少なくとも全データの1割が正例になるようにする # データに偏りが出ないよう,データのタイプを1/4の確率で決定 select_seed = np.random.randint(0, 2) if select_seed == 0: train_data = create_pair(gallery1, gallery2, probe1, probe2) elif select_seed == 1: train_data = create_pair(probe1, probe2, gallery1, gallery2) elif select_seed == 2: train_data = create_pair(gallery1, gallery2, gallery1, gallery2) elif select_seed == 3: train_data = create_pair(probe1, probe2, probe1, probe2) # バッチをつくる前にシャッフル shuffle = random.sample(train_data, len(train_data)) # ミニバッチに分割 mini_batches = make_batch_list(shuffle, batch_size) for batch in mini_batches: # リストから1つのnumpyバッチにする pos1, pos2, neg1, neg2, signal = list2npy(batch, id) # Forward g_out, p_out = model(pos1, pos2, neg1, neg2) # lossの計算 signal = F.flatten(signal) # contrastive_lossの仕様に合わせて1次元化 loss = F.contrastive(g_out, p_out, signal, margin=3, reduce='mean') # print cuda.to_cpu(loss.data) accum_loss += cuda.to_cpu(loss.data) # backward model.cleargrads() loss.backward() # パラメータ更新 optimizer.update() if iteration % 10000 == 0: optimizer.lr = optimizer.lr * 0.1 print('学習率がさがりました:{}'.format(optimizer.lr)) if iteration % 5000 == 0: serializers.save_npz( save_dir + '/model_snapshot_{}'.format(int(iteration)), model) iteration += 1.0 i += 1 print('epock:{}'.format(i), 'iteration:{}'.format(int(iteration)), 'accum_loss:{}'.format(accum_loss / float(len(mini_batches)))) writer.add_scalar('train/loss', accum_loss / float(len(mini_batches)), i) # 約1000イテレーション毎にモデルを保存 # if iteration % 10000 == 0: # serializers.save_npz(save_dir + '/model_snapshot_{}'.format(i), model) g = c.build_computational_graph(g_out[0]) with open(save_dir + '/graph.dot', 'w') as o: o.write(g.dump())
def main(): print("\n---------------- argument check ------------------") if len(sys.argv) != 2: print("Invalid argument.################################" + str(len(sys.argv))) exit(1) dataset_path = sys.argv[1] print("\n------------------ preprocess --------------------") proj_root = os.path.dirname(os.path.dirname(os.path.dirname(root_path))) config_path = os.path.join(proj_root, "data", "config.json") layer_path = os.path.join(proj_root, "data", "layer_settings.json") # configファイル読込み try: with open(config_path, "r") as f: config = json.load(f) print("loaded config file") except IOError: print("config.json load error.") sys.exit(1) # layerファイル読込み try: with open(layer_path, "r") as f: layer_data = json.load(f) print("loaded layer info file") except IOError: print("layer_settings.json load error.") sys.exit(1) in_w = layer_data["nodes"][0]["settings"]["width"] in_h = layer_data["nodes"][0]["settings"]["height"] in_c = layer_data["nodes"][0]["settings"]["channel"] # 学習・テストデータの読込み (x_train, t_train), (x_test, t_test) = \ load_data(dataset_path, input_width=in_w, input_height=in_h, input_channel=in_c, test_size=0.1, flatten=False) # (x_train, t_train), (x_test, t_test) = load_mnist(flatten=False) print("loaded training data") print("\n---------------- hyper parameter -----------------") train_size = len(t_train) test_size = len(t_test) batch_size = config["batch"] epoch = config["epoch"] iter_per_epoch = int(train_size / batch_size) max_iter = epoch * iter_per_epoch opt_name = config["optimizer"] lr = config["lr"] if opt_name == "Adagrad": optimizer = optimizers.AdaGrad(lr=lr) elif opt_name == "Adam": optimizer = optimizers.Adam() elif opt_name == "MonmentumSGD": optimizer = optimizers.MomentumSGD(lr=lr, momentum=0.9) #default lr = 0.01 elif opt_name == "Adadelta": optimizer = optimizers.AdaDelta() else: print("unsupported optimizer type:" + opt_name) sys.exit(2) print("training size : " + str(train_size)) print("test size : " + str(test_size)) print("batch size : " + str(batch_size)) print("epoch size : " + str(epoch)) print("optimizer type: " + opt_name) print("learning rate : " + str(lr)) print("input width : " + str(in_w)) print("input height : " + str(in_h)) print("\n------------ CNN layer Consituation --------------") model = CNN(layer_data["layer_info"]) optimizer.use_cleargrads() optimizer.setup(model) print("\n------------ training start --------------") for e in range(0, epoch): print("epoch[" + str(e) + "/" + str(epoch) + "]") # interval_start = time.clock() sum_loss = 0 sum_accuracy = 0 # ------------ 学習データによる学習とその結果を取得 --------- train_stroke_size = int(train_size / batch_size) for i in tqdm(range(0, train_stroke_size)): # for i in tqdm(range(max_iter)): # バッチサイズ分のデータを取得 batch_mask = np.random.choice(train_size, batch_size) X = x_train[batch_mask] # 畳み込みように3次元にしておく(width,height,channel) X = np.array(X).astype(np.float32).reshape(len(X), in_c, in_w, in_h) # 実数はnp.float32, 正数はinp.int32に固定する(しなければならない) x_batch = chainer.Variable(np.array(X).astype(np.float32)) t_batch = np.array(t_train[batch_mask]).astype(np.int32) # 前のバッチで取得した勾配をクリアする model.cleargrads() # 順伝播で初期パラメータを学習させる loss = model.forward(x_batch, t_batch, True) # 逆伝播で初期パラメータから最適な(損失率が少ない)パラメータを取得する loss.backward() optimizer.update() accuracy = float(model.accuracy.data) * len(t_batch.data) sum_loss += float(model.loss.data) * len(t_batch.data) sum_accuracy += accuracy del X gc.collect() # print("epoch : " + str(epoch) + " / " + str(n_epoch)) print("train loss: %f", sum_loss / train_stroke_size) print("train accuracy: %f", sum_accuracy / train_stroke_size) # log = "accuracy:" + str(sum_accuracy/iter_per_epoch) + " loss:" + str(sum_loss/iter_per_epoch) sum_loss = 0 sum_accuracy = 0 # --------- テストデータによる結果を取得 ----------- test_stroke_size = int(test_size / batch_size) for i in tqdm(range(test_stroke_size)): batch_mask = np.random.choice(test_size, batch_size) X = x_test[batch_mask] X = np.array(X).astype(np.float32).reshape(len(X), in_c, in_w, in_h) x_batch = chainer.Variable(np.array(X).astype(np.float32)) t_batch = np.array(t_test[batch_mask]).astype(np.int32) # model.cleargrads() loss = model.forward(x_batch, t_batch, False) # loss.backward() # optimizer.update() # accuracy = float(model.accuracy.data) * len(t_batch.data) sum_loss += float(model.loss.data) * len(t_batch.data) sum_accuracy += float(model.accuracy.data) * len(t_batch.data) # print("accuracy:" + str(accuracy) + " loss:" + str(sum_loss)) del X gc.collect() print("train loss: %f", sum_loss / test_stroke_size) print("train accuracy: %f", sum_accuracy / test_stroke_size) # with open(os.path.join(root_path, "memory","train.txt"), "w") as f: # f.write(log) model_path = os.path.join(root_path, "memory", "model.pkl") pickle.dump(model, open(model_path, "wb"), -1)
x, t = load_iris(return_X_y=True) x = x.astype('float32') t = t.astype('int32') dataset = TupleDataset(x, t) train_val, test = split_dataset_random(dataset, int(len(dataset) * 0.7), seed=0) train, valid = split_dataset_random(train_val, int(len(train_val) * 0.7), seed=0) train_iter = SerialIterator(train, batch_size=4, repeat=True, shuffle=True) optimizer = optimizers.MomentumSGD(lr=0.001, momentum=0.9) optimizer.setup(net) for param in net.params(): if param.name != 'b': # バイアス以外だったら param.update_rule.add_hook(WeightDecay(0.0001)) # 重み減衰を適用 n_batch = 64 # バッチサイズ n_epoch = 50 # エポック数 # ログ results_train, results_valid = {}, {} results_train['loss'], results_train['accuracy'] = [], [] results_valid['loss'], results_valid['accuracy'] = [], [] train_iter.reset() # 上で一度 next() が呼ばれているため
def training(self, iterators): train_iter, val_iter = iterators if self.opt_method == 'Adam': #opt_nsn = optimizers.Adam(alpha=0.05767827010227712, beta1=0.9687170166672859, # beta2=0.9918705323205452, eps=0.03260658847351856) opt_nsn = optimizers.Adam() opt_nsn.setup(self.model) #opt_nsn.add_hook(chainer.optimizer.WeightDecay(0.00000416029939)) opt_nsn.add_hook(chainer.optimizer.WeightDecay(0.001)) elif self.opt_method == 'SGD': opt_nsn = optimizers.SGD(lr=1.0) #opt_nsn = optimizers.SGD(lr=0.01) opt_nsn.setup(self.model) #opt_nsn.add_hook(chainer.optimizer.WeightDecay(0.001)) opt_nsn.add_hook(chainer.optimizer.WeightDecay(0.00009)) elif self.opt_method == 'MomentumSGD': opt_nsn = optimizers.MomentumSGD(lr=0.0001, momentum=0.99) opt_nsn.setup(self.model) #opt_nsn.add_hook(chainer.optimizer.WeightDecay(0.00001)) train_eval, test_eval = {}, {} train_eval['loss'], test_eval['loss'] = [], [] for cri in self.criteria: train_eval[cri] = [] test_eval[cri] = [] N_train = train_iter.dataset.__len__() N_test = val_iter.dataset.__len__() with open(self.opbase + '/result.txt', 'w') as f: f.write('N_train: {}\n'.format(N_train)) f.write('N_test: {}\n'.format(N_test)) bestAccuracy, bestRecall, bestPrecision, bestSpecificity, bestFmeasure, bestIoU = 0, 0, 0, 0, 0, 0 bestEpoch = 0 for epoch in range(1, self.epoch + 1): print('[epoch {}]'.format(epoch)) traeval, train_sum_loss = self._trainer(train_iter, opt_nsn, epoch=epoch) train_eval['loss'].append(train_sum_loss / (N_train * self.batchsize)) if epoch % self.val_iteration == 0: teseval, test_sum_loss = self._validater(val_iter, epoch=epoch) test_eval['loss'].append(test_sum_loss / (N_test * self.batchsize)) else: teseval = {} for cri in self.criteria: teseval[cri] = 0 test_eval['loss'].append(0) test_sum_loss = 0 for cri in self.criteria: train_eval[cri].append(traeval[cri]) test_eval[cri].append(teseval[cri]) print('train mean loss={}'.format(train_sum_loss / (N_train * self.batchsize))) print('train accuracy={}, train recall={}'.format( traeval['Accuracy'], traeval['Recall'])) print('train precision={}, specificity={}'.format( traeval['Precision'], traeval['Specificity'])) print('train F-measure={}, IoU={}'.format(traeval['F-measure'], traeval['IoU'])) print('test mean loss={}'.format(test_sum_loss / (N_test * self.batchsize))) print('test accuracy={}, recall={}'.format(teseval['Accuracy'], teseval['Recall'])) print('test precision={}, specificity={}'.format( teseval['Precision'], teseval['Specificity'])) print('test F-measure={}, IoU={}'.format(teseval['F-measure'], teseval['IoU'])) with open(self.opbase + '/result.txt', 'a') as f: f.write('========================================\n') f.write('[epoch' + str(epoch) + ']\n') f.write('train mean loss={}\n'.format( train_sum_loss / (N_train * self.batchsize))) f.write('train accuracy={}, train recall={}\n'.format( traeval['Accuracy'], traeval['Recall'])) f.write('train precision={}, specificity={}\n'.format( traeval['Precision'], traeval['Specificity'])) f.write('train F-measure={}, IoU={}\n'.format( traeval['F-measure'], traeval['IoU'])) if epoch % self.val_iteration == 0: f.write('validation mean loss={}\n'.format( test_sum_loss / (N_test * self.batchsize))) f.write('validation accuracy={}, recall={}\n'.format( teseval['Accuracy'], teseval['Recall'])) f.write('validation precision={}, specificity={}\n'.format( teseval['Precision'], teseval['Specificity'])) f.write('validation F-measure={}, IoU={}\n'.format( teseval['F-measure'], teseval['IoU'])) with open(self.opbase + '/TrainResult.csv', 'a') as f: c = csv.writer(f) c.writerow([ epoch, traeval['Accuracy'], traeval['Recall'], traeval['Precision'], traeval['Specificity'], traeval['F-measure'], traeval['IoU'] ]) with open(self.opbase + '/ValResult.csv', 'a') as f: c = csv.writer(f) c.writerow([ epoch, teseval['Accuracy'], teseval['Recall'], teseval['Precision'], teseval['Specificity'], teseval['F-measure'], teseval['IoU'] ]) if epoch == 1: pastLoss = train_sum_loss if train_sum_loss > pastLoss and self.opt_method == 'SGD': learning_rate = opt_nsn.lr * 1.0 opt_nsn = optimizers.SGD(learning_rate) opt_nsn.setup(self.model) with open(self.opbase + '/result.txt', 'a') as f: f.write('lr: {}\n'.format(opt_nsn.lr)) pastLoss = train_sum_loss if bestAccuracy <= teseval['Accuracy']: bestAccuracy = teseval['Accuracy'] if bestRecall <= teseval['Recall']: bestRecall = teseval['Recall'] # Save Model # model_name = 'NSN_Recall_p' + str(self.patchsize) + '.npz' # serializers.save_npz(self.opbase + '/' + model_name, self.model) if bestPrecision <= teseval['Precision']: bestPrecision = teseval['Precision'] # Save Model # model_name = 'NSN_Precision_p' + str(self.patchsize) + '.npz' # serializers.save_npz(self.opbase + '/' + model_name, self.model) if bestSpecificity <= teseval['Specificity']: bestSpecificity = teseval['Specificity'] if bestFmeasure <= teseval['F-measure']: bestFmeasure = teseval['F-measure'] if bestIoU <= teseval['IoU']: bestIoU = teseval['IoU'] bestEpoch = epoch # Save Model if epoch > 0: model_name = 'NSN_IoU_p' + str(self.patchsize) + '.npz' serializers.save_npz(self.opbase + '/' + model_name, self.model) else: bestIoU = 0.0 bestScore = [ bestAccuracy, bestRecall, bestPrecision, bestSpecificity, bestFmeasure, bestIoU ] print('========================================') print('Best Epoch : ' + str(bestEpoch)) print('Best Accuracy : ' + str(bestAccuracy)) print('Best Recall : ' + str(bestRecall)) print('Best Precision : ' + str(bestPrecision)) print('Best Specificity : ' + str(bestSpecificity)) print('Best F-measure : ' + str(bestFmeasure)) print('Best IoU : ' + str(bestIoU)) with open(self.opbase + '/result.txt', 'a') as f: f.write('################################################\n') f.write('BestAccuracy={}\n'.format(bestAccuracy)) f.write('BestRecall={}, BestPrecision={}\n'.format( bestRecall, bestPrecision)) f.write('BestSpecificity={}, BestFmesure={}\n'.format( bestSpecificity, bestFmeasure)) f.write('BestIoU={}, BestEpoch={}\n'.format(bestIoU, bestEpoch)) f.write('################################################\n') return train_eval, test_eval, bestScore
with open(label_file, "r") as f: labels = f.read().strip().split("\n") # load model print("loading model...") model = Darknet19Predictor(Darknet19()) backup_file = "%s/401.model" % (backup_path) if os.path.isfile(backup_file): serializers.load_hdf5(backup_file, model) # load saved model model.predictor.train = True cuda.get_device(0).use() model.to_gpu(0) # for gpu start = time.time() optimizer = optimizers.MomentumSGD(lr=learning_rate, momentum=momentum) optimizer.use_cleargrads() optimizer.setup(model) #freeze the weights #odel.conv1.disable_update() optimizer.add_hook(chainer.optimizer.WeightDecay(weight_decay)) item_files = glob.glob(item_path + "/*") x_debug = [] t_debug = [] # start to train print("start training") for batch in range(max_batches): model.predictor.train = True
def do_train(config_training): if config_training["training_management"]["disable_cudnn_softmax"]: import nmt_chainer.models.feedforward.multi_attention nmt_chainer.models.feedforward.multi_attention.disable_cudnn_softmax = True src_indexer, tgt_indexer = load_voc_and_update_training_config( config_training) save_prefix = config_training.training_management.save_prefix output_files_dict = {} output_files_dict["train_config"] = save_prefix + ".train.config" output_files_dict["model_ckpt"] = save_prefix + ".model." + "ckpt" + ".npz" output_files_dict["model_final"] = save_prefix + \ ".model." + "final" + ".npz" output_files_dict["model_best"] = save_prefix + ".model." + "best" + ".npz" output_files_dict[ "model_best_loss"] = save_prefix + ".model." + "best_loss" + ".npz" # output_files_dict["model_ckpt_config"] = save_prefix + ".model." + "ckpt" + ".config" # output_files_dict["model_final_config"] = save_prefix + ".model." + "final" + ".config" # output_files_dict["model_best_config"] = save_prefix + ".model." + "best" + ".config" # output_files_dict["model_best_loss_config"] = save_prefix + ".model." + "best_loss" + ".config" output_files_dict["test_translation_output"] = save_prefix + ".test.out" output_files_dict["test_src_output"] = save_prefix + ".test.src.out" output_files_dict["dev_translation_output"] = save_prefix + ".dev.out" output_files_dict["dev_src_output"] = save_prefix + ".dev.src.out" output_files_dict["valid_translation_output"] = save_prefix + ".valid.out" output_files_dict["valid_src_output"] = save_prefix + ".valid.src.out" output_files_dict["sqlite_db"] = save_prefix + ".result.sqlite" output_files_dict[ "optimizer_ckpt"] = save_prefix + ".optimizer." + "ckpt" + ".npz" output_files_dict[ "optimizer_final"] = save_prefix + ".optimizer." + "final" + ".npz" save_prefix_dir, save_prefix_fn = os.path.split(save_prefix) ensure_path(save_prefix_dir) already_existing_files = [] for key_info, filename in output_files_dict.iteritems( ): # , valid_data_fn]: if os.path.exists(filename): already_existing_files.append(filename) if len(already_existing_files) > 0: print "Warning: existing files are going to be replaced / updated: ", already_existing_files if not config_training.training_management.force_overwrite: raw_input("Press Enter to Continue") save_train_config_fn = output_files_dict["train_config"] log.info("Saving training config to %s" % save_train_config_fn) config_training.save_to(save_train_config_fn) # json.dump(config_training, open(save_train_config_fn, "w"), indent=2, separators=(',', ': ')) Vi = len(src_indexer) # + UNK Vo = len(tgt_indexer) # + UNK eos_idx = Vo data_fn = config_training.data.data_fn log.info("loading training data from %s" % data_fn) training_data_all = json.load(gzip.open(data_fn, "rb")) training_data = training_data_all["train"] log.info("loaded %i sentences as training data" % len(training_data)) if "test" in training_data_all: test_data = training_data_all["test"] log.info("Found test data: %i sentences" % len(test_data)) else: test_data = None log.info("No test data found") if "dev" in training_data_all: dev_data = training_data_all["dev"] log.info("Found dev data: %i sentences" % len(dev_data)) else: dev_data = None log.info("No dev data found") if "valid" in training_data_all: valid_data = training_data_all["valid"] log.info("Found valid data: %i sentences" % len(valid_data)) else: valid_data = None log.info("No valid data found") max_src_tgt_length = config_training.training_management.max_src_tgt_length if max_src_tgt_length is not None: log.info("filtering sentences of length larger than %i" % (max_src_tgt_length)) filtered_training_data = [] nb_filtered = 0 for src, tgt in training_data: if len(src) <= max_src_tgt_length and len( tgt) <= max_src_tgt_length: filtered_training_data.append((src, tgt)) else: nb_filtered += 1 log.info("filtered %i sentences of length larger than %i" % (nb_filtered, max_src_tgt_length)) training_data = filtered_training_data if not config_training.training.no_shuffle_of_training_data: log.info("shuffling") import random random.shuffle(training_data) log.info("done") encdec, _, _, _ = create_encdec_and_indexers_from_config_dict( config_training, src_indexer=src_indexer, tgt_indexer=tgt_indexer, load_config_model="if_exists" if config_training.training_management.resume else "no") if (config_training.training.get("load_initial_source_embeddings", None) is not None or config_training.training.get( "load_initial_target_embeddings", None) is not None): src_emb = None tgt_emb = None src_emb_fn = config_training.training.get( "load_initial_source_embeddings", None) tgt_emb_fn = config_training.training.get( "load_initial_target_embeddings", None) if src_emb_fn is not None: log.info("loading source embeddings from %s", src_emb_fn) src_emb = np.load(src_emb_fn) if tgt_emb_fn is not None: log.info("loading target embeddings from %s", tgt_emb_fn) tgt_emb = np.load(tgt_emb_fn) encdec.initialize_embeddings(src_emb, tgt_emb, no_unk_src=True, no_unk_tgt=True) # create_encdec_from_config_dict(config_training.model, src_indexer, tgt_indexer, # load_config_model = "if_exists" if config_training.training_management.resume else "no") # if config_training.training_management.resume: # if "model_parameters" not in config_training: # log.error("cannot find model parameters in config file") # if config_training.model_parameters.type == "model": # model_filename = config_training.model_parameters.filename # log.info("resuming from model parameters %s" % model_filename) # serializers.load_npz(model_filename, encdec) if config_training.training_management.load_model is not None: log.info("loading model parameters from %s", config_training.training_management.load_model) load_model_flexible(config_training.training_management.load_model, encdec) # try: # serializers.load_npz(config_training.training_management.load_model, encdec) # except KeyError: # log.info("not model format, trying snapshot format") # with np.load(config_training.training_management.load_model) as fseri: # dicseri = serializers.NpzDeserializer(fseri, path="updater/model:main/") # dicseri.load(encdec) gpu = config_training.training_management.gpu if gpu is not None: encdec = encdec.to_gpu(gpu) if config_training.training.optimizer == "adadelta": optimizer = optimizers.AdaDelta() elif config_training.training.optimizer == "adam": optimizer = optimizers.Adam() elif config_training.training.optimizer == "scheduled_adam": from nmt_chainer.additional_links.scheduled_adam import ScheduledAdam optimizer = ScheduledAdam(d_model=config_training.model.ff_d_model) elif config_training.training.optimizer == "adagrad": optimizer = optimizers.AdaGrad( lr=config_training.training.learning_rate) elif config_training.training.optimizer == "sgd": optimizer = optimizers.SGD(lr=config_training.training.learning_rate) elif config_training.training.optimizer == "momentum": optimizer = optimizers.MomentumSGD( lr=config_training.training.learning_rate, momentum=config_training.training.momentum) elif config_training.training.optimizer == "nesterov": optimizer = optimizers.NesterovAG( lr=config_training.training.learning_rate, momentum=config_training.training.momentum) elif config_training.training.optimizer == "rmsprop": optimizer = optimizers.RMSprop( lr=config_training.training.learning_rate) elif config_training.training.optimizer == "rmspropgraves": optimizer = optimizers.RMSpropGraves( lr=config_training.training.learning_rate, momentum=config_training.training.momentum) else: raise NotImplemented with cuda.get_device(gpu): optimizer.setup(encdec) if config_training.training.l2_gradient_clipping is not None and config_training.training.l2_gradient_clipping > 0: optimizer.add_hook( chainer.optimizer.GradientClipping( config_training.training.l2_gradient_clipping)) if config_training.training.hard_gradient_clipping is not None and config_training.training.hard_gradient_clipping > 0: optimizer.add_hook( chainer.optimizer.GradientHardClipping( *config_training.training.hard_gradient_clipping)) if config_training.training.weight_decay is not None: optimizer.add_hook( chainer.optimizer.WeightDecay( config_training.training.weight_decay)) if config_training.training_management.load_optimizer_state is not None: with cuda.get_device(gpu): log.info("loading optimizer parameters from %s", config_training.training_management.load_optimizer_state) serializers.load_npz( config_training.training_management.load_optimizer_state, optimizer) if config_training.training_management.timer_hook: timer_hook = profiling_tools.MyTimerHook else: import contextlib @contextlib.contextmanager def timer_hook(): yield import training_chainer with cuda.get_device(gpu): with timer_hook() as timer_infos: if config_training.training_management.max_nb_iters is not None: stop_trigger = ( config_training.training_management.max_nb_iters, "iteration") if config_training.training_management.max_nb_epochs is not None: log.warn( "max_nb_iters and max_nb_epochs both specified. Only max_nb_iters will be considered." ) elif config_training.training_management.max_nb_epochs is not None: stop_trigger = ( config_training.training_management.max_nb_epochs, "epoch") else: stop_trigger = None training_chainer.train_on_data_chainer( encdec, optimizer, training_data, output_files_dict, src_indexer, tgt_indexer, eos_idx=eos_idx, config_training=config_training, stop_trigger=stop_trigger, test_data=test_data, dev_data=dev_data, valid_data=valid_data)