def main(): args = parse() if args.gpu >= 0: import cupy as xp cuda.check_cuda_available() cuda.get_device_from_id(args.gpu).use() sys.stderr.write('w/ using GPU [%d] \n' % args.gpu) else: import numpy as xp args.gpu = -1 sys.stderr.write('w/o using GPU') # 乱数の初期値の設定 sys.stderr.write('# random seed [%d] \n' % args.seed) np.random.seed(args.seed) xp.random.seed(args.seed) random.seed(args.seed) chainer.global_config.train = True chainer.global_config.enable_backprop = True chainer.global_config.use_cudnn = "always" chainer.global_config.type_check = True sys.stderr.write('CHAINER CONFIG [{}] \n'.format( chainer.global_config.__dict__)) if args.dropout_rate >= 1.0 or args.dropout_rate < 0.0: sys.stderr.write( 'Warning: dropout rate is invalid!\nDropout rate is forcibly set 1.0' ) train_model(args)
def to_gpu( self, device=None, # type: tp.Optional[types.CudaDeviceSpec] ): # type: (...) -> 'DeviceResident' """Copies parameter variables and persistent values to GPU. This method does not handle non-registered attributes. If some of such attributes must be copied to GPU, the link implementation must override :meth:`Link.to_device` to do so. Args: device: Target device specifier. If omitted, the current device is used. Returns: self """ cuda.check_cuda_available() cuda_device = cuda._get_device_or_current(device) device = chainer.backends.cuda.GpuDevice(cuda_device) visitor = _ToDeviceVisitor( device, entry_method_info=('to_gpu', {'device': device.device}), skip_between_cupy_devices=True) self.__to_device(visitor) return self
def to_gpu( self, device=None, # type: tp.Optional[types.CudaDeviceSpec] ): # type: (...) -> 'DeviceResident' """Copies parameter variables and persistent values to GPU. This method does not handle non-registered attributes. If some of such attributes must be copied to GPU, the link implementation must override :meth:`~DeviceResident.device_resident_accept` to do so. .. warning:: This method does not transfer the parameters if they are already on GPU. Use ``to_device`` to perform inter-GPU transfer. Args: device: Target device specifier. If omitted, the current device is used. Returns: self """ cuda.check_cuda_available() cuda_device = cuda._get_device_or_current(device) device = chainer.backends.cuda.GpuDevice(cuda_device) visitor = _ToDeviceVisitor(device, entry_method_info=('to_gpu', { 'device': device.device }), skip_between_cupy_devices=True, starting_device_resident=self) self.__to_device(visitor) return self
def to_gpu(self, device=None): """Copies parameter variables and persistent values to GPU. This method does not handle non-registered attributes. If some of such attributes must be copied to GPU, the link implementation must override this method to do so. Args: device: Target device specifier. If omitted, the current device is used. Returns: self """ cuda.check_cuda_available() if not self._cpu: return self d = self.__dict__ with cuda._get_device(device): for name in self._params: d[name].to_gpu() for name in self._persistent: value = d[name] if isinstance(value, intel64.mdarray): value = numpy.array(value) if isinstance(value, numpy.ndarray): d[name] = cuda.to_gpu(value) self._device_id = cuda.cupy.cuda.get_device_id() self._cpu = False return self
def to_gpu(self, device=None): """Copies parameter variables and persistent values to GPU. This method does not handle non-registered attributes. If some of such attributes must be copied to GPU, the link implementation must override this method to do so. Args: device: Target device specifier. If omitted, the current device is used. Returns: self """ cuda.check_cuda_available() if not self._cpu: return self d = self.__dict__ with cuda._get_device(device): for name in self._params: d[name].to_gpu() for name in self._persistent: value = d[name] if isinstance(value, intel64.mdarray): value = numpy.array(value) if isinstance(value, numpy.ndarray): d[name] = cuda.to_gpu(value) self._device_id = cuda.cupy.cuda.get_device_id() self._cpu = False return self
def __init__(self): cuda.check_cuda_available() if not memory_hook_available: msg = 'CuPy >= 2.0 is required. %s' % str(_resolution_error) raise RuntimeError(msg) self.call_history = [] self._memory_hook = CupyMemoryCumulativeHook() self._running_stack = [] self._total_used_bytes = 0 self._total_acquired_bytes = 0
def __init__(self): cuda.check_cuda_available() if not memory_hook_available: msg = 'CuPy >= 2.0 is required. %s' % str(_resolution_error) raise RuntimeError(msg) self.call_history = [] self._memory_hook = CupyMemoryCumulativeHook() self._running_stack = [] self._total_used_bytes = 0 self._total_acquired_bytes = 0
def load_models(args: argparse.Namespace) -> dict[str, chainer.Chain]: """Load models using a args config Args: args (argparse.Namespace): argparse namespace containing config such as the arch and color Returns: dict[str, chainer.Chain]: Mapping of model names to chainer.Chain models """ ch = 3 if args.color == "rgb" else 1 if args.model_dir is None: model_dir = THISDIR + f"/models/{args.arch.lower()}" else: model_dir = args.model_dir models = {} flag = False if args.method == "noise_scale": model_name = f"anime_style_noise{args.noise_level}_scale_{args.color}.npz" model_path = os.path.join(model_dir, model_name) if os.path.exists(model_path): models["noise_scale"] = srcnn.archs[args.arch](ch) load_npz(model_path, models["noise_scale"]) alpha_model_name = f"anime_style_scale_{args.color}.npz" alpha_model_path = os.path.join(model_dir, alpha_model_name) models["alpha"] = srcnn.archs[args.arch](ch) load_npz(alpha_model_path, models["alpha"]) else: flag = True if args.method == "scale" or flag: model_name = f"anime_style_scale_{args.color}.npz" model_path = os.path.join(model_dir, model_name) models["scale"] = srcnn.archs[args.arch](ch) load_npz(model_path, models["scale"]) if args.method == "noise" or flag: model_name = f"anime_style_noise{args.noise_level}_{args.color}.npz" model_path = os.path.join(model_dir, model_name) if not os.path.exists(model_path): model_name = f"anime_style_noise{args.noise_level}_scale_{args.color}.npz" model_path = os.path.join(model_dir, model_name) models["noise"] = srcnn.archs[args.arch](ch) load_npz(model_path, models["noise"]) if args.gpu >= 0: cuda.check_cuda_available() cuda.get_device(args.gpu).use() for _, model in models.items(): model.to_gpu() return models
def to_gpu(self, device=None): """Copies parameter variables and persistent values to GPU. This method does not handle non-registered attributes. If some of such attributes must be copied to GPU, the link implementation must override :meth:`Link.to_device` to do so. Args: device: Target device specifier. If omitted, the current device is used. Returns: self """ cuda.check_cuda_available() return self._to_device(cuda._get_device_or_current(device), skip_between_cupy_devices=True)
def to_gpu(self, device=None): """Copies parameter variables and persistent values to GPU. This method does not handle non-registered attributes. If some of such attributes must be copied to GPU, the link implementation must override :meth:`Link.to_device` to do so. Args: device: Target device specifier. If omitted, the current device is used. Returns: self """ cuda.check_cuda_available() return self._to_device( cuda._get_device_or_current(device), skip_between_cupy_devices=True)
def main(): global xp args = parse() if args.gpu >= 0: import cupy xp = cupy cuda.check_cuda_available() cuda.get_device_from_id(args.gpu).use() sys.stderr.write('w/ using GPU [%d] \n' % args.gpu) else: args.gpu = -1 sys.stderr.write('w/o using GPU\n') chainer.global_config.train = False chainer.global_config.enable_backprop = False chainer.global_config.use_cudnn = "always" chainer.global_config.type_check = True args.dropout_rate = .0 ttest_model(args)
def __init__(self): cuda.check_cuda_available() if not cuda.cupy.cuda.nvtx_enabled: raise RuntimeError('nvtx is required for CUDAProfileHook')
help='learning minibatch size') parser.add_argument('--label', '-l', type=int, default=5, help='number of labels') parser.add_argument('--epocheval', '-p', type=int, default=5, help='number of epochs per evaluation') parser.add_argument('--test', dest='test', action='store_true') parser.set_defaults(test=False) args = parser.parse_args() if args.gpu >= 0: cuda.check_cuda_available() xp = cuda.cupy if args.gpu >= 0 else np n_epoch = args.epoch # number of epochs n_units = args.unit # number of units per layer batchsize = args.batchsize # minibatch size n_label = args.label # number of labels epoch_per_eval = args.epocheval # number of epochs per evaluation class SexpParser(object): def __init__(self, line): self.tokens = re.findall(r'\(|\)|[^\(\) ]+', line) self.pos = 0 def parse(self):
def main(): parser = argparse.ArgumentParser() parser.add_argument('--gpu', '-g', default=-1, type=int, help='GPU ID (negative value indicates CPU)') parser.add_argument('--unit', '-u', default=100, type=int, help='number of units') parser.add_argument('--window', '-w', default=5, type=int, help='window size') parser.add_argument('--batchsize', '-b', type=int, default=1000, help='learning minibatch size') parser.add_argument('--epoch', '-e', default=20, type=int, help='number of epochs to learn') parser.add_argument('--model', '-m', choices=['skipgram', 'cbow'], default='skipgram', help='model type ("skipgram", "cbow")') parser.add_argument('--negative-size', default=5, type=int, help='number of negative samples') parser.add_argument('--out-type', '-o', choices=['hsm', 'ns', 'original'], default='hsm', help='output model type ("hsm": hierarchical softmax, ' '"ns": negative sampling, "original": ' 'no approximation)') parser.add_argument('--out', default='result', help='Directory to output the result') parser.add_argument('--test', dest='test', action='store_true') parser.set_defaults(test=False) args = parser.parse_args() if args.gpu >= 0: chainer.backends.cuda.get_device_from_id(args.gpu).use() cuda.check_cuda_available() print('GPU: {}'.format(args.gpu)) print('# unit: {}'.format(args.unit)) print('Window: {}'.format(args.window)) print('Minibatch-size: {}'.format(args.batchsize)) print('# epoch: {}'.format(args.epoch)) print('Training model: {}'.format(args.model)) print('Output type: {}'.format(args.out_type)) print('') # Initialize the dataset init = Initialization() # Load the dataset train, val, _ = init.get_words() counts = collections.Counter(train) counts.update(collections.Counter(val)) n_vocab = max(train) + 1 if args.test: train = train[:100] val = val[:100] vocab = init.get_vocabulary() index2word = {wid: word for word, wid in six.iteritems(vocab)} print('n_vocab: %d' % n_vocab) print('data length: %d' % len(train)) if args.out_type == 'hsm': HSM = L.BinaryHierarchicalSoftmax tree = HSM.create_huffman_tree(counts) loss_func = HSM(args.unit, tree) loss_func.W.array[...] = 0 elif args.out_type == 'ns': cs = [counts[w] for w in range(len(counts))] loss_func = L.NegativeSampling(args.unit, cs, args.negative_size) loss_func.W.array[...] = 0 elif args.out_type == 'original': loss_func = SoftmaxCrossEntropyLoss(args.unit, n_vocab) else: raise Exception('Unknown output type: {}'.format(args.out_type)) # Choose the model if args.model == 'skipgram': model = SkipGram(n_vocab, args.unit, loss_func) elif args.model == 'cbow': model = ContinuousBoW(n_vocab, args.unit, loss_func) else: raise Exception('Unknown model type: {}'.format(args.model)) if args.gpu >= 0: model.to_gpu() # Set up an optimizer optimizer = O.Adam() optimizer.setup(model) # Set up an iterator train_iter = WindowIterator(train, args.window, args.batchsize) val_iter = WindowIterator(val, args.window, args.batchsize, repeat=False) # Set up an updater updater = training.updaters.StandardUpdater(train_iter, optimizer, converter=convert, device=args.gpu) # Set up a trainer trainer = training.Trainer(updater, (args.epoch, 'epoch'), out=args.out) trainer.extend( extensions.Evaluator(val_iter, model, converter=convert, device=args.gpu)) trainer.extend(extensions.LogReport()) trainer.extend( extensions.PrintReport(['epoch', 'main/loss', 'validation/main/loss'])) trainer.extend(extensions.ProgressBar()) trainer.run() # Save the word2vec model with open('word2vec.model', 'w') as f: f.write('%d %d\n' % (len(index2word), args.unit)) w = cuda.to_cpu(model.embed.W.array) for i, wi in enumerate(w): v = ' '.join(map(str, wi)) f.write('%s %s\n' % (index2word[i], v))
parser.add_argument('dataset', help='Path to validation image-label list file') parser.add_argument('model_type', choices=('alexnet', 'caffenet', 'googlenet', 'resnet'), help='Model type (alexnet, caffenet, googlenet, resnet)') parser.add_argument('model', help='Path to the pretrained Caffe model') parser.add_argument('--basepath', '-b', default='/', help='Base path for images in the dataset') parser.add_argument('--mean', '-m', default='ilsvrc_2012_mean.npy', help='Path to the mean file') parser.add_argument('--batchsize', '-B', type=int, default=100, help='Minibatch size') parser.add_argument('--gpu', '-g', type=int, default=-1, help='Zero-origin GPU ID (nevative value indicates CPU)') args = parser.parse_args() if args.gpu >= 0: cuda.check_cuda_available() xp = cuda.cupy if args.gpu >= 0 else np assert args.batchsize > 0 chainer.config.train = False # All the codes will run in test mode dataset = [] with open(args.dataset) as list_file: for line in list_file: pair = line.strip().split() path = os.path.join(args.basepath, pair[0]) dataset.append((path, np.int32(pair[1]))) assert len(dataset) % args.batchsize == 0
def main(): parser = argparse.ArgumentParser() parser.add_argument('--gpu', '-g', default=-1, type=int, help='GPU ID (negative value indicates CPU)') parser.add_argument('--unit', '-u', default=100, type=int, help='number of units') parser.add_argument('--window', '-w', default=5, type=int, help='window size') parser.add_argument('--batchsize', '-b', type=int, default=1000, help='learning minibatch size') parser.add_argument('--epoch', '-e', default=20, type=int, help='number of epochs to learn') parser.add_argument('--model', '-m', choices=['skipgram', 'cbow'], default='skipgram', help='model type ("skipgram", "cbow")') parser.add_argument('--negative-size', default=5, type=int, help='number of negative samples') parser.add_argument('--out-type', '-o', choices=['hsm', 'ns', 'original'], default='hsm', help='output model type ("hsm": hierarchical softmax, ' '"ns": negative sampling, "original": ' 'no approximation)') parser.add_argument('--out', default='result', help='Directory to output the result') parser.add_argument('--test', dest='test', action='store_true') parser.set_defaults(test=False) args = parser.parse_args() if args.gpu >= 0: chainer.backends.cuda.get_device_from_id(args.gpu).use() cuda.check_cuda_available() print('GPU: {}'.format(args.gpu)) print('# unit: {}'.format(args.unit)) print('Window: {}'.format(args.window)) print('Minibatch-size: {}'.format(args.batchsize)) print('# epoch: {}'.format(args.epoch)) print('Training model: {}'.format(args.model)) print('Output type: {}'.format(args.out_type)) print('') if args.gpu >= 0: cuda.get_device_from_id(args.gpu).use() # Load the dataset train, val, _ = chainer.datasets.get_ptb_words() counts = collections.Counter(train) counts.update(collections.Counter(val)) n_vocab = max(train) + 1 if args.test: train = train[:100] val = val[:100] vocab = chainer.datasets.get_ptb_words_vocabulary() index2word = {wid: word for word, wid in six.iteritems(vocab)} print('n_vocab: %d' % n_vocab) print('data length: %d' % len(train)) if args.out_type == 'hsm': HSM = L.BinaryHierarchicalSoftmax tree = HSM.create_huffman_tree(counts) loss_func = HSM(args.unit, tree) loss_func.W.data[...] = 0 elif args.out_type == 'ns': cs = [counts[w] for w in range(len(counts))] loss_func = L.NegativeSampling(args.unit, cs, args.negative_size) loss_func.W.data[...] = 0 elif args.out_type == 'original': loss_func = SoftmaxCrossEntropyLoss(args.unit, n_vocab) else: raise Exception('Unknown output type: {}'.format(args.out_type)) # Choose the model if args.model == 'skipgram': model = SkipGram(n_vocab, args.unit, loss_func) elif args.model == 'cbow': model = ContinuousBoW(n_vocab, args.unit, loss_func) else: raise Exception('Unknown model type: {}'.format(args.model)) if args.gpu >= 0: model.to_gpu() # Set up an optimizer optimizer = O.Adam() optimizer.setup(model) # Set up an iterator train_iter = WindowIterator(train, args.window, args.batchsize) val_iter = WindowIterator(val, args.window, args.batchsize, repeat=False) # Set up an updater updater = training.updaters.StandardUpdater( train_iter, optimizer, converter=convert, device=args.gpu) # Set up a trainer trainer = training.Trainer(updater, (args.epoch, 'epoch'), out=args.out) trainer.extend(extensions.Evaluator( val_iter, model, converter=convert, device=args.gpu)) trainer.extend(extensions.LogReport()) trainer.extend(extensions.PrintReport( ['epoch', 'main/loss', 'validation/main/loss'])) trainer.extend(extensions.ProgressBar()) trainer.run() # Save the word2vec model with open('word2vec.model', 'w') as f: f.write('%d %d\n' % (len(index2word), args.unit)) w = cuda.to_cpu(model.embed.W.data) for i, wi in enumerate(w): v = ' '.join(map(str, wi)) f.write('%s %s\n' % (index2word[i], v))
def main(params): #def main(): parser = ArgumentParser() parser.add_argument('--gpu', '-g', default=-1, type=int, help='GPU ID') parser.add_argument('--embedding_file', '-e', metavar="FILE", default=None) parser.add_argument('--training_file', metavar="FILE", default=None) parser.add_argument('--test_file', metavar="FILE", default=None) parser.add_argument('--voc_limit', metavar="INT", type=int, default=100000) parser.add_argument('--num_epoch', metavar="INT", type=int, default=10) parser.add_argument('--first_layer', '-f', metavar="INT", type=int, default=100) parser.add_argument('--second_layer', '-s', metavar="INT", type=int, default=20) parser.add_argument('--cut_max_length', '-c', metavar="INT", type=int, default=None) ###### # for fine tuning by hyperopt f_layer = int(params['f_layer_num']) s_layer = int(params['s_layer_num']) cut_length = int(params['cut_length']) print("f_layer={0}".format(f_layer)) print("s_layer={0}".format(s_layer)) ###### args = parser.parse_args() if args.embedding_file is None: args.embedding_file = "." print(args.gpu) cuda.check_cuda_available() if args.embedding_file is not None: print(args.embedding_file) if args.training_file is not None: print(args.training_file) if args.test_file is not None: print(args.test_file) print(args.voc_limit) print(args.num_epoch) train_data, train_label = Utils.load_data(args.training_file) test_data, test_label = Utils.load_data(args.test_file) ############################# #Approach 1: setting word embedding layer embedding_loader = Embedding_Loader( embedding_file_path=args.embedding_file) embedding_l = embedding_loader.load_embedding(voc_limit=30000) train_max_length, train_data = embedding_loader.seq_to_ids(train_data) test_max_length, test_data = embedding_loader.seq_to_ids(test_data) max_length = train_max_length if train_max_length > test_max_length else test_max_length train_data = Utils.zero_padding(train_data, max_length) test_data = Utils.zero_padding(test_data, max_length) #if args.cut_max_length is not None: #train_data = Utils.cut_seq(train_data, args.cut_max_length) #test_data = Utils.cut_seq(test_data, args.cut_max_length) if cut_length != 0: train_data = Utils.cut_seq(train_data, cut_length) test_data = Utils.cut_seq(test_data, cut_length) train_data = embedding_l(train_data) test_data = embedding_l(test_data) train_data = F.reshape(train_data, [-1, train_data.shape[2] * train_data.shape[1]]) #train_data = tuple_dataset.TupleDataset(train_data.array, train_label.reshape(-1,1)) #train_label = np.eye(4)[train_label].astype(np.int32) #print(train_label.shape) #print(train_data.shape) train_data = tuple_dataset.TupleDataset(train_data.array, train_label) #train_data = tuple_dataset.TupleDataset(train_data.array, train_label) test_data = F.reshape(test_data, [-1, test_data.shape[2] * test_data.shape[1]]) #test_data = tuple_dataset.TupleDataset(test_data.array, test_label.reshape(-1,1)) #test_label = np.eye(4)[test_label].astype(np.int32) #print(test_label.shape) #print(test_data.shape) test_data = tuple_dataset.TupleDataset(test_data.array, test_label) #test_data = tuple_dataset.TupleDataset(test_data.array, test_label) ############################# ''' #Approach 2: extract tfidf feature vectorizer = TfidfVectorizer() vectorizer.fit(train_data) train_data_vectorized = np.array(vectorizer.transform(train_data).toarray(), dtype=np.float32) test_data_vectorized = np.array(vectorizer.transform(test_data).toarray(), dtype=np.float32) train_data = tuple_dataset.TupleDataset(train_data_vectorized, train_label) test_data = tuple_dataset.TupleDataset(test_data_vectorized, test_label) #print(train_data_vectorized.shape) #print(train_data_vectorized.dtype.kind) #print(test_data_vectorized.shape) ''' ########################## batch_size = 32 #batch_size = 128 train_iter = chainer.iterators.SerialIterator(train_data, batch_size) test_iter = chainer.iterators.SerialIterator(test_data, batch_size, repeat=False, shuffle=False) #model = L.Classifier(MLP(100, 50, 1)) #model = L.Classifier(MLP(5, 2, 1), lossfun=mean_squared_error) #model = L.Classifier(MLP(50, 20, 4)) model = L.Classifier(MLP(f_layer, s_layer, 4)) #model = L.Classifier(MLP(args.first_layer, args.second_layer, 4)) #model = MLP(100, 50, 1) #model.to_gpu(args.gpu) #optimizer = chainer.optimizers.SGD() optimizer = chainer.optimizers.Adam() optimizer.setup(model) #print("optimizer") updater = training.StandardUpdater(train_iter, optimizer, device=args.gpu) trainer = training.Trainer(updater, (args.num_epoch, 'epoch'), out="result") trainer.extend(extensions.Evaluator(test_iter, model, device=args.gpu)) #trainer.extend(extensions.dump_graph('main/loss')) #trainer.extend(extensions.snapshot(), trigger=(args.num_epoch, 'epoch')) trainer.extend(extensions.LogReport()) trainer.extend( extensions.PrintReport([ 'epoch', 'main/loss', 'validation/main/loss', 'main/accuracy', 'validation/main/accuracy' ])) ''' trainer.extend(extensions.PlotReport( ['main/loss', 'validation/main/loss'], 'epoch', file_name='loss.png')) trainer.extend(extensions.PlotReport( ['main/accuracy', 'validation/main/accuracy'], 'epoch', file_name='accuracy.png')) trainer.extend(extensions.ProgressBar()) ''' print("start running...") trainer.run() print("finished running...") ###### # for fine tuning by hyperopt ''' valid_data = trainer._extensions['PlotReport'].extension._data loss_data = [data for i, data in valid_data['validation/main/loss']] best10_loss = sorted(loss_data)[:10] return sum(best10_loss) / 10 ''' with open('result/log', 'r') as f: tmp_result_str = f.read() tmp_result_list = ast.literal_eval(tmp_result_str) # print(tmp_result_list) # print(type(tmp_result_list)) loss_data = [] for (i, tmp_dict) in enumerate(tmp_result_list): loss_data.append(tmp_dict['validation/main/loss']) if len(loss_data) > 9: best_loss = sorted(loss_data)[:10] return sum(best_loss) / 10 else: best_loss = sorted(loss_data)[:] return sum(best_loss) / len(loss_data) ###### '''
def train(args): if not os.path.exists(args.out): os.makedirs(args.out) if args.gpu >= 0: cuda.check_cuda_available() cuda.get_device(args.gpu).use() if args.random_seed: set_random_seed(args.random_seed, (args.gpu,)) user2index = load_dict(os.path.join(args.indir, USER_DICT_FILENAME)) item2index = load_dict(os.path.join(args.indir, ITEM_DICT_FILENAME)) (trimmed_word2count, word2index, aspect2index, opinion2index) = read_and_trim_vocab( args.indir, args.trimfreq ) aspect_opinions = get_aspect_opinions(os.path.join(args.indir, TRAIN_FILENAME)) export_params( args, user2index, item2index, trimmed_word2count, word2index, aspect2index, opinion2index, aspect_opinions, ) src_aspect_score = SOURCE_ASPECT_SCORE.get(args.context, "aspect_score_efm") data_loader = DataLoader( args.indir, user2index, item2index, trimmed_word2count, word2index, aspect2index, opinion2index, aspect_opinions, src_aspect_score, ) train_iter, val_iter = get_dataset_iterator( args.context, data_loader, args.batchsize ) model = get_context_model(args, data_loader) if args.optimizer == "rmsprop": optimizer = O.RMSprop(lr=args.learning_rate, alpha=args.alpha) elif args.optimizer == "adam": optimizer = O.Adam(amsgrad=args.amsgrad) optimizer.setup(model) if args.grad_clip: optimizer.add_hook(GradientClipping(args.grad_clip)) if args.gpu >= 0: model.to_gpu(args.gpu) updater = training.updaters.StandardUpdater( train_iter, optimizer, converter=convert, device=args.gpu ) early_stop = triggers.EarlyStoppingTrigger( monitor="validation/main/loss", patients=args.patients, max_trigger=(args.epoch, "epoch"), ) trainer = training.Trainer(updater, stop_trigger=early_stop, out=args.out) trainer.extend( extensions.Evaluator(val_iter, model, converter=convert, device=args.gpu) ) trainer.extend(extensions.LogReport()) trainer.extend( extensions.PrintReport( ["epoch", "main/loss", "validation/main/loss", "lr", "elapsed_time"] ) ) trainer.extend( extensions.PlotReport( ["main/loss", "validation/main/loss"], x_key="epoch", file_name="loss.png" ) ) trainer.extend(extensions.ProgressBar()) trainer.extend( extensions.snapshot_object(model, MODEL_FILENAME), trigger=triggers.MinValueTrigger("validation/main/loss"), ) trainer.extend(extensions.observe_lr()) if args.optimizer in ["rmsprop"]: if args.schedule_lr: epoch_list = np.array( [i for i in range(1, int(args.epoch / args.stepsize) + 1)] ).astype(np.int32) value_list = args.learning_rate * args.lr_reduce ** epoch_list value_list[value_list < args.min_learning_rate] = args.min_learning_rate epoch_list *= args.stepsize epoch_list += args.begin_step trainer.extend( schedule_optimizer_value(epoch_list.tolist(), value_list.tolist()) ) trainer.run()