def train(idx): print('train by fold: {}'.format(idx)) df = pd.read_feather('./data/features.normalized.feather') fold = pd.read_feather('./data/fold.{}.feather'.format(idx)) df['valid'] = df['SK_ID_CURR'].isin(fold['SK_ID_CURR']) model = TemporalEnsembling(df) train_dset = create_dataset(df, 'TARGET', 'valid') train_iter = SerialIterator(train_dset, 512) optimizer = Adam() optimizer.setup(model) updater = StandardUpdater(train_iter, optimizer, device=0) trainer = Trainer(updater, (20, 'epoch'), out='tempem_result') trainer.extend( make_extension((1, 'epoch'))(UpdateExtention( train_dset, './data/nn.fold.{}.feather'.format(idx)))) trainer.extend(extensions.LogReport()) filename = 'fold_%d_snapshot_epoch-{.updater.epoch}' % (idx) trainer.extend(extensions.snapshot(filename=filename)) trainer.extend( extensions.PrintReport( ['epoch', 'main/loss', 'main/val/auc', 'elapsed_time'])) trainer.run()
class Train: def __init__(self): with open("data.pickle", "rb") as f: self.data = pickle.load(f) self.model = Model() self.model.to_gpu() self.optimizer = Adam() self.optimizer.setup(self.model) self.executor = ThreadPoolExecutor(8) self.hoge = self.data.next(2, 2) def load(self): d = self.hoge self.hoge = self.executor.submit(self.data.next, 2, 2) return d def training(self): for i in range(1000000000000000): a = self.batch() if i % 100 == 0: print(f"{i} loss:{a}") def batch(self): a, b = self.load() self.model.cleargrads() y = tuple(self.executor.map(self.model, a + b)) loss = F.contrastive(y[0], y[1], [1]) +\ F.contrastive(y[2], y[3], [1]) +\ F.contrastive(y[0], y[2], [0]) +\ F.contrastive(y[1], y[3], [0]) loss.backward() self.optimizer.update() return loss.data.get()
def main(): # data train, test = get_mnist(withlabel=False) n_x = train.shape[1] # model model = create_sample_model(n_x) n_batch = 256 train_iter = SerialIterator(train, n_batch) # TODO: report test loss # test_iter = SerialIterator(test, n_batch) optimizer = Adam() optimizer.setup(model) gpu = 0 updater = StandardUpdater(train_iter, optimizer, device=gpu) n_epoch = 50 trainer = Trainer(updater, (n_epoch, 'epoch')) trainer.extend( snapshot_object( model, filename='snapshot_epoch_{.updater.epoch:03d}.npz'), trigger=(1, 'epoch')) trainer.extend(LogReport()) trainer.extend(PrintReport([ 'epoch', 'main/loss', 'main/iaf_loss', 'main/rec_loss', ])) trainer.run()
def main(): train, test = chainer.datasets.get_mnist() def forward(x, t, model): y, l = model(x) if model.c: y, l = Lmt(t)(y, l) t = np.eye(10)[t].astype(np.float32) loss = mse(y, t) return loss model = MLP(c=0.05) optimizer = Opt() optimizer.setup(model) for epoch in range(5): for batch in SerialIterator(train, 60, repeat=False): x, t = format(batch) optimizer.update(forward, x, t, model) tx, tt = format(test) print("epoch {}: accuracy: {:.3f}".format(epoch + 1, model.accuracy(tx, tt))) fgsm = FGSM(model) for eta in [0.01, 0.02, 0.05, 0.1]: cnt = 0 fail = 0 for i in np.random.randint(0, 10000, 100): res = fgsm.attack(test[i][0], test[i][1], eta=eta) if res != -1: cnt += 1 if not res: fail += 1 print("c: {:.3f}, eta: {:.3f}, attacked: {:.3f}".format(model.c, eta, fail / cnt))
def main(): train_x, train_y, val_x, val_y = load_pascal_voc_dataset(DATASET_ROOT) train_dataset = YoloDataset(train_x, train_y, target_size=model_class.img_size, n_grid=model_class.n_grid, augment=True) test_dataset = YoloDataset(val_x, val_y, target_size=model_class.img_size, n_grid=model_class.n_grid, augment=False) class_weights = [1.0 for i in range(train_dataset.n_classes)] class_weights[0] = 0.2 model = model_class(n_classes=train_dataset.n_classes, n_base_units=6, class_weights=class_weights) if os.path.exists(RESULT_DIR + '/model_last.npz'): print('continue from previous result') chainer.serializers.load_npz(RESULT_DIR + '/model_last.npz', model) optimizer = Adam() optimizer.setup(model) train_iter = SerialIterator(train_dataset, batch_size=BATCH_SIZE) test_iter = SerialIterator(test_dataset, batch_size=BATCH_SIZE, shuffle=False, repeat=False) updater = StandardUpdater(train_iter, optimizer, device=DEVICE) trainer = Trainer(updater, (N_EPOCHS, 'epoch'), out=RESULT_DIR) trainer.extend(extensions.dump_graph('main/loss')) trainer.extend(extensions.LogReport()) trainer.extend(extensions.ProgressBar(update_interval=10)) trainer.extend(extensions.Evaluator(test_iter, model, device=DEVICE)) trainer.extend( extensions.PrintReport([ 'main/loss', 'validation/main/loss', 'main/cl_loss', 'validation/main/cl_loss', 'main/cl_acc', 'validation/main/cl_acc', 'main/pos_loss', 'validation/main/pos_loss', ])) trainer.extend(extensions.snapshot_object(model, 'best_loss.npz'), trigger=triggers.MinValueTrigger('validation/main/loss')) trainer.extend(extensions.snapshot_object(model, 'best_classification.npz'), trigger=triggers.MaxValueTrigger('validation/main/cl_acc')) trainer.extend( extensions.snapshot_object(model, 'best_position.npz'), trigger=triggers.MinValueTrigger('validation/main/pos_loss')) trainer.extend(extensions.snapshot_object(model, 'model_last.npz'), trigger=(1, 'epoch')) trainer.run()
def main(): args = parse_args() # config dataset = Cifar10Dataset(split='train') train_iter = chainer.iterators.SerialIterator(dataset, args.batch_size) gen = Generator() dis = Discriminator(args.n_labels) opts = { 'opt_gen': Adam(args.alpha, args.beta1, args.beta2).setup(gen), 'opt_dis': Adam(args.alpha, args.beta1, args.beta2).setup(dis) } updater_args = { 'iterator': { 'main': train_iter }, 'device': args.device, 'models': [gen, dis], 'optimizer': opts } if 0 <= args.device: chainer.backends.cuda.get_device_from_id(args.device).use() gen.to_gpu() dis.to_gpu() noise_gen = NoiseGenerator(gen.xp, n_labels=args.n_labels) updater = ACGANUpdater(noise_gen, **updater_args) trainer = chainer.training.Trainer(updater, (args.max_iter, 'iteration'), out=args.out) # setup logging snapshot_interval = (args.max_iter, 'iteration') sample_interval = (1000, 'iteration') display_interval = (10, 'iteration') trainer.extend(extensions.snapshot_object(gen, 'gen_{.updater.iteration}.npz'), trigger=snapshot_interval) trainer.extend(extensions.snapshot_object(dis, 'dis_{.updater.iteration}.npz'), trigger=snapshot_interval) trainer.extend(extensions.ProgressBar(update_interval=10)) log_keys = ["iteration", "loss_dis", "loss_gen"] trainer.extend(extensions.LogReport(log_keys, trigger=display_interval)) trainer.extend(extensions.PrintReport(log_keys), trigger=display_interval) trainer.extend(sample_generate(gen, noise_gen), trigger=sample_interval) trainer.extend( extensions.PlotReport(['loss_gen', 'loss_dis'], 'iteration', file_name='loss.png')) trainer.run()
def __init__(self): with open("data.pickle", "rb") as f: self.data = pickle.load(f) self.model = Model() self.model.to_gpu() self.optimizer = Adam() self.optimizer.setup(self.model) self.executor = ThreadPoolExecutor(8) self.hoge = self.data.next(2, 2)
def chainer_model_pipe(self, nn, train, valid, params): epoch = params['epoch'] batch_size = params['batch_size'] use_gpu = params['use_gpu'] if 'fixed_base_w' in params.keys(): fixed_base_w = params['fixed_base_w'] else: fixed_base_w = False # Model Instance model = L.Classifier(nn) if use_gpu: device = 0 model.to_gpu(device) else: device = -1 # ミニバッチのインスタンスを作成 train_iter = SerialIterator(train, batch_size) valid_iter = SerialIterator(valid, batch_size, repeat=False, shuffle=False) # Set Lerning optimizer = Adam() optimizer.setup(model) if fixed_base_w: model.predictor.base.disable_update() updater = StandardUpdater(train_iter, optimizer, device=device) trainer = Trainer(updater, (epoch, 'epoch'), out='result/cat_dog') trainer.extend(Evaluator(valid_iter, model, device=device)) trainer.extend(LogReport(trigger=(1, 'epoch'))) trainer.extend(PrintReport([ 'epoch', 'main/accuracy', 'validation/main/accuracy', 'main/loss', 'validation/main/loss', 'elapsed_time' ]), trigger=(1, 'epoch')) trainer.run() if use_gpu: model.to_cpu() return model
def main(): parser = argparse.ArgumentParser() parser.add_argument("--model", default=None) parser.add_argument("--gpu", type=int, default=0) parser.add_argument("--batch_size", type=int, default=4) parser.add_argument("--data_dir", type=str, default="./datasets") parser.add_argument("--data_list", type=str, default="train.txt") parser.add_argument("--n_class", type=int, default=5) parser.add_argument("--n_steps", type=int, default=100) parser.add_argument("--snapshot_dir", type=str, default="./snapshots") parser.add_argument("--save_steps", type=int, default=50) args = parser.parse_args() print(args) if not os.path.exists(args.snapshot_dir): os.makedirs(args.snapshot_dir) model = RefineResNet(n_class=args.n_class) if args.model is not None: serializers.load_npz(args.model, model) if args.gpu >= 0: cuda.get_device(args.gpu).use() model.to_gpu() xp = cuda.cupy else: xp = np optimizer = Adam() #optimizer = MomentumSGD() optimizer.setup(model) optimizer.add_hook(WeightDecay(1e-5), "hook_wd") train_dataset = ImageDataset(args.data_dir, args.data_list, crop_size=(320, 320)) train_iterator = MultiprocessIterator(train_dataset, batch_size=args.batch_size, repeat=True, shuffle=True) step = 0 for zipped_batch in train_iterator: step += 1 x = Variable(xp.array([zipped[0] for zipped in zipped_batch])) y = Variable( xp.array([zipped[1] for zipped in zipped_batch], dtype=xp.int32)) pred = xp.array(model(x).data, dtype=xp.float32) loss = F.softmax_cross_entropy(pred, y) optimizer.update(F.softmax_cross_entropy, pred, y) print("Step: {}, Loss: {}".format(step, loss.data)) if step % args.save_steps == 0: serializers.save_npz( os.path.join(args.snapshot_dir, "model_{}.npz".format(step)), model) if step >= args.n_steps: break
def initialize(which, model_name, bg, face): my_model = LastLayers() load_model = True if load_model: p = os.path.join(P.MODELS, model_name) chainer.serializers.load_npz(p, my_model) print('%s loaded' % model_name) bg_model = Deepimpression() p = os.path.join(P.MODELS, bg) chainer.serializers.load_npz(p, bg_model) print('bg model %s loaded' % bg) face_model = Deepimpression() p = os.path.join(P.MODELS, face) chainer.serializers.load_npz(p, face_model) print('face model %s loaded' % face) my_optimizer = Adam(alpha=0.0002, beta1=0.5, beta2=0.999, eps=10e-8) my_optimizer.setup(my_model) if C.ON_GPU: my_model = my_model.to_gpu(device=C.DEVICE) bg_model = bg_model.to_gpu(device=C.DEVICE) face_model = face_model.to_gpu(device=C.DEVICE) print('Initializing') print('model initialized with %d parameters' % my_model.count_params()) epochs = 1 if which == 'val': labels = h5.File(P.CHALEARN_VAL_LABELS_20, 'r') steps = len(labels) // C.VAL_BATCH_SIZE elif which == 'test': labels = h5.File(P.CHALEARN_TEST_LABELS_20, 'r') steps = len(labels) // C.TEST_BATCH_SIZE else: print('which is not correct') labels = None steps = None loss = [] pred_diff = np.zeros((1, 1), float) id_frames = h5.File(P.NUM_FRAMES, 'r') return my_model, bg_model, face_model, my_optimizer, epochs, labels, steps, loss, pred_diff, id_frames
def __init__(self, model, target, device_id=-1, learning_rate=0.00025, momentum=.9, minibatch_size=32, update_interval=10000): assert isinstance(model, ChainerModel), \ 'model should inherit from ChainerModel' super(QNeuralNetwork, self).__init__(model.input_shape, model.output_shape) self._gpu_device = None self._loss_val = 0 # Target model update method self._steps = 0 self._target_update_interval = update_interval # Setup model and target network self._minibatch_size = minibatch_size self._model = model self._target = target self._target.copyparams(self._model) # If GPU move to GPU memory if device_id >= 0: with cuda.get_device(device_id) as device: self._gpu_device = device self._model.to_gpu(device) self._target.to_gpu(device) # Setup optimizer self._optimizer = Adam(learning_rate, momentum, 0.999) self._optimizer.setup(self._model)
def main(): # input_size: 299 #model = InceptionV4(dim_out=17) #model = InceptionV4(dim_out=17, base_filter_num=6, ablocks=2, bblocks=1, cblocks=1) #model = InceptionResNetV2(dim_out=17) #model = InceptionResNetV2(dim_out=17, base_filter_num=8, ablocks=1, bblocks=2, cblocks=1) # input_size: 224 #model = VGGNetBN(17) # VGGNet original size #model = VGGNetBN(17, 16) # VGGNet 1/4 of filter num #model = GoogLeNetBN(17) # GoogLeNet original size #model = GoogLeNetBN(17, 16) # GoogleNet 1/2 filter num #model = GoogLeNetBN(17, 8) # GoogleNet 1/4 filter num #model = ResNet50(17) # ResNet50 original size #model = ResNet50(17, 32) # ResNet50 1/2 size #model = ResNet50(17, 16) # ResNet50 1/4 size #model = SqueezeNet(17) #SqueezeNet original size #model = SqueezeNet(17, 8) #SqueezeNet 1/2 filter num #model = MobileNet(17) # MobileNet original size #model = MobileNet(17, 16) # MobileNet 1/2 filter num #model = MobileNet(17, 8) # MobileNet 1/4 filter num # input_size: 100 #model = FaceClassifier100x100V2(n_classes=17) model = FaceClassifier100x100V(n_classes=17) optimizer = Adam() optimizer.setup(model) train_dataset = load_dataset('train.tsv', True) test_dataset = load_dataset('test.tsv') train_iter = SerialIterator(train_dataset, batch_size=BATCH_SIZE) test_iter = SerialIterator(test_dataset, batch_size=BATCH_SIZE, shuffle=False, repeat=False) updater = StandardUpdater(train_iter, optimizer, device=DEVICE) trainer = Trainer(updater, (N_EPOCHS, 'epoch'), out='result') trainer.extend(extensions.dump_graph('main/loss')) trainer.extend(extensions.LogReport()) trainer.extend(extensions.ProgressBar(update_interval=10)) trainer.extend(extensions.Evaluator(test_iter, model, device=DEVICE)) trainer.extend(extensions.PrintReport(['main/loss', 'validation/main/loss', 'main/accuracy', 'validation/main/accuracy'])) #trainer.extend(extensions.snapshot_object(model, 'snapshot_{.updater.epoch}.model')) trainer.run() chainer.serializers.save_npz('result/model.npz', model.to_cpu())
def create_agent(env, config, from_path=None): n_actions = env.action_space.n # Initialize Q-network for predicting action values q_func = SingleModelStateQFunctionWithDiscreteAction( CustomModel(n_actions)) if config['gpu_id'] != -1: q_func = q_func.to_gpu(config['gpu_id']) # Use Adam to optimize q_func. eps=1e-2 is for stability. optimizer = Adam(eps=config['epsilon'], amsgrad=True, alpha=config['learning_rate']) optimizer.setup(q_func) # Use epsilon-greedy for exploration explorer = LinearDecayEpsilonGreedy( start_epsilon=config['start_epsilon'], end_epsilon=config['end_epsilon'], decay_steps=config['decay_steps'], random_action_func=env.action_space.sample) # DQN uses Experience Replay. Specify a replay buffer and its capacity. replay_buffer = EpisodicReplayBuffer( capacity=config['replay_buffer_capacity']) # Now create an agent that will interact with the environment. agent = chainerrl.agents.DQN( q_func, optimizer, replay_buffer, config['gamma'], explorer, gpu=config['gpu_id'], replay_start_size=config['replay_start_size'], update_interval=config['update_interval'], target_update_interval=config['target_update_interval']) if from_path is not None: agent.load(from_path) return agent
def __init__(self, action_space, model, optimizer=Adam()): self.action_space = action_space self.model = model self.optimizer = optimizer self.optimizer.setup(self.model) # monitor score and reward self.rewards = [] self.scores = []
def get_ddqn(env): rda = RandomAgent(env) q_func = QFunction() opt = Adam(alpha=1e-3) opt.setup(q_func) opt.add_hook(optim.GradientClipping(1.0), 'hook') rbuf = PrioritizedEpisodicReplayBuffer(5 * 10**5) explorer = LinearDecayEpsilonGreedy(start_epsilon=1.0, end_epsilon=0.3, decay_steps=10000, random_action_func=rda.random_action) agent = DoubleDQN(q_func, opt, rbuf, gamma=0.995, explorer=explorer, replay_start_size=500, target_update_interval=1, target_update_method='soft', update_interval=4, soft_update_tau=1e-2, n_times_update=1, gpu=0, minibatch_size=128) return agent
def run_linear_network(loss_fn, alpha=0.3, batch_size=2): # Get data np.random.seed(42) dataset = get_dataset() iterator = SerialIterator(dataset, batch_size, repeat=True, shuffle=True) # Set up network and loss predictor = L.Linear(None, 1) ranker = Ranker(predictor) loss = Loss(ranker, loss_fn) # Optimizer optimizer = Adam(alpha=alpha) optimizer.setup(loss) updater = StandardUpdater(iterator, optimizer, converter=zeropad_concat) trainer = Trainer(updater, (100, 'epoch')) log_report = extensions.LogReport(log_name=None) trainer.extend(log_report) np.random.seed(42) trainer.run() last_ndcg = log_report.log[-1]['ndcg'] return last_ndcg
def __init__(self, n_words, emb_size, n_hidden, n_classes, classes): self.model = chainer.FunctionSet( Emb=F.EmbedID(n_words, emb_size), W=F.Linear(emb_size, n_hidden), U=F.Linear(n_hidden, n_hidden), O=F.Linear(n_hidden, n_classes) ) self.n_hidden = n_hidden self.n_clsses = n_classes self.emb_size = emb_size self.classes = classes self.classes_rev = {v: k for k, v in classes.iteritems()} for param in self.model.parameters: param[:] = np.random.randn(*param.shape) * 0.1 self.optimizer = Adam(alpha=0.001, beta1=0.9, beta2=0.999, eps=1e-8) self.optimizer.setup(self.model)
def _setup_optimizer(config, model, comm): optimizer_name = config['optimizer'] lr = float(config['init_lr']) weight_decay = float(config['weight_decay']) if optimizer_name == 'Adam': optimizer = Adam(alpha=lr, weight_decay_rate=weight_decay) elif optimizer_name in \ ('SGD', 'MomentumSGD', 'CorrectedMomentumSGD', 'RMSprop'): optimizer = eval(optimizer_name)(lr=lr) if weight_decay > 0.: optimizer.add_hook(WeightDecay(weight_decay)) else: raise ValueError('Invalid optimizer: {}'.format(optimizer_name)) if comm is not None: optimizer = chainermn.create_multi_node_optimizer(optimizer, comm) optimizer.setup(model) return optimizer
def test_update_core(): np.random.seed(0) n_leaf = 8 n_tree = 4 n_batch = 10 x = np.random.randint(0, n_leaf, size=(n_batch, n_tree)) x += np.tile(np.arange(0, n_tree) * n_leaf, (n_batch, 1)) y = np.random.randint(0, 2, size=(n_batch)) train_x, test_x, train_y, test_y = train_test_split(x, y, test_size=2, random_state=0) train_dset = TupleDataset(train_x, train_y) train_iter = SerialIterator(train_dset, 2) test_dset = test_x test_iter = SerialIterator(test_dset, 2) model = MeanTeacherChain(Model(n_tree * n_leaf)) opt = Adam().setup(model) with chainer.using_config('enable_backprop', False): before_loss = model.teacher.forward(train_x, train_y) updater = MeanTeacherUpdater( iterator={ 'train': train_iter, 'test': test_iter, }, optimizer=opt, device=-1, ) updater.update() with chainer.using_config('enable_backprop', False): after_loss = model.teacher.forward(train_x, train_y) assert before_loss.array > after_loss.array
def main(): parser = argparse.ArgumentParser() parser.add_argument('--out', default='result') parser.add_argument('--n-epoch', type=int, default=30) parser.add_argument('--gamma', type=float, default=1e-2) parser.add_argument('--device', type=int, default=-1) args = parser.parse_args() dset = create_dataset(figpath=Path(args.out) / 'train.png') iterator = SerialIterator(dset, batch_size=512, shuffle=True, repeat=True) model = Model() opt = Adam(alpha=args.gamma).setup(model) initializer = UniformInitializer((512, 2)) sampler = LangevinSampler(initializer) updater = MCMCMLLUpdater(sampler, iterator, opt, device=args.device) trainer = Trainer(updater, (args.n_epoch, 'epoch')) trainer.extend(plot_sample(sampler)) trainer.extend(extensions.LogReport()) trainer.extend(extensions.observe_lr()) trainer.extend( extensions.PrintReport( ['epoch', 'main/loss', 'main/logq', 'main/logp'])) trainer.run()
from chainer.functions import expand_dims from random import shuffle my_model = Deepimpression() load_model = True if load_model: p = os.path.join(P.MODELS, 'epoch_89_33') chainer.serializers.load_npz(p, my_model) print('model loaded') continuefrom = 0 else: continuefrom = 0 # optimizer = Adam(alpha=0.0002, beta1=0.5, beta2=0.999, eps=10e-8, weight_decay_rate=0.0001) my_optimizer = Adam(alpha=0.0002, beta1=0.5, beta2=0.999, eps=10e-8) my_optimizer.setup(my_model) if C.ON_GPU: my_model = my_model.to_gpu(device=C.DEVICE) print('Initializing') print('model initialized with %d parameters' % my_model.count_params()) # epochs = C.EPOCHS epochs = 1 train_labels = h5.File(P.CHALEARN_TRAIN_LABELS_20, 'r') val_labels = h5.File(P.CHALEARN_VAL_LABELS_20, 'r') test_labels = h5.File(P.CHALEARN_TEST_LABELS_20, 'r')
class QNeuralNetwork(QModel): def __init__(self, model, target, device_id=-1, learning_rate=0.00025, momentum=.9, minibatch_size=32, update_interval=10000): assert isinstance(model, ChainerModel), \ 'model should inherit from ChainerModel' super(QNeuralNetwork, self).__init__(model.input_shape, model.output_shape) self._gpu_device = None self._loss_val = 0 # Target model update method self._steps = 0 self._target_update_interval = update_interval # Setup model and target network self._minibatch_size = minibatch_size self._model = model self._target = target self._target.copyparams(self._model) # If GPU move to GPU memory if device_id >= 0: with cuda.get_device(device_id) as device: self._gpu_device = device self._model.to_gpu(device) self._target.to_gpu(device) # Setup optimizer self._optimizer = Adam(learning_rate, momentum, 0.999) self._optimizer.setup(self._model) def evaluate(self, environment, model=QModel.ACTION_VALUE_NETWORK): if check_rank(environment.shape, get_rank(self._input_shape)): environment = environment.reshape((1, ) + environment.shape) # Move data if necessary if self._gpu_device is not None: environment = cuda.to_gpu(environment, self._gpu_device) if model == QModel.ACTION_VALUE_NETWORK: output = self._model(environment) else: output = self._target(environment) return cuda.to_cpu(output.data) def train(self, x, y, actions=None): actions = actions.astype(np.int32) batch_size = len(actions) if self._gpu_device: x = cuda.to_gpu(x, self._gpu_device) y = cuda.to_gpu(y, self._gpu_device) actions = cuda.to_gpu(actions, self._gpu_device) q = self._model(x) q_subset = F.reshape(F.select_item(q, actions), (batch_size, 1)) y = y.reshape(batch_size, 1) loss = F.sum(F.huber_loss(q_subset, y, 1.0)) self._model.cleargrads() loss.backward() self._optimizer.update() self._loss_val = np.asscalar(cuda.to_cpu(loss.data)) # Keeps track of the number of train() calls self._steps += 1 if self._steps % self._target_update_interval == 0: # copy weights self._target.copyparams(self._model) @property def loss_val(self): return self._loss_val # / self._minibatch_size def save(self, output_file): save_npz(output_file, self._model) def load(self, input_file): load_npz(input_file, self._model) # Copy parameter from model to target self._target.copyparams(self._model)
my_model = Triplet() if load_model: m_num = 3 e_num = 9 ep = 85 models_path = '/scratch/users/gabras/data/omg_empathy/saving_data/models' p = os.path.join(models_path, 'model_%d_experiment_%d' % (m_num, e_num), 'epoch_%d' % ep) chainer.serializers.load_npz(p, my_model) else: ep = -1 my_optimizer = Adam(alpha=0.0002, beta1=0.5, beta2=0.999, eps=10e-8, weight_decay_rate=0.0001) # my_optimizer = Adam(alpha=0.0002, beta1=0.5, beta2=0.999, eps=10e-8) my_optimizer.setup(my_model) if C.ON_GPU: my_model = my_model.to_gpu(device=C.DEVICE) print('Initializing') print('model initialized with %d parameters' % my_model.count_params()) # -------------------------------------------------------------------------------------------- DEBUG = False # -------------------------------------------------------------------------------------------- if DEBUG:
else: raise RuntimeError('Invalid dataset choice.') loss = 0 gen_loss = [] dis_loss = [] n_train_data = len(train_x) # ハイパーパラメータ epochs = 1 batch_size = 100 n_hidden = 100 # Generator generator = Generator(n_hidden=n_hidden) opt_gen = Adam() opt_gen.setup(generator) opt_gen.add_hook(GradientClipping(5)) loss_gen = 0 # Discriminator discriminator = Discriminator() opt_dis = Adam() opt_dis.setup(discriminator) opt_dis.add_hook(GradientClipping(5)) loss_dis = 0 # time start_at = time.time() cur_at = start_at
from chainercv.links.model.resnet import ResNet50 from models.arcface import ArcFace from paired_image_dataset import PairedImageSet chainer.config.cv_resize_backend = 'cv2' if __name__ == "__main__": photo_path = sorted(Path('photos').glob('*')) sketch_path = sorted(Path('sketches').glob('*')) pair_list = [[str(i), str(j)] for i, j in zip(photo_path, sketch_path)] img_size = (200, 250) dataset = PairedImageSet(pair_list, '', img_size, False, np.float32) iter_train = MultiprocessIterator(dataset, 5, n_processes=2) adam = Adam(alpha=0.002, beta1=0.0, beta2=0.9) resnet = ResNet50(pretrained_model='imagenet') fc_dim = 500 resnet.fc6 = L.Linear(None, fc_dim) # change the number of fc layer to 500 temp = 30 margin = 0.5 arcface = ArcFace(temp, margin, resnet) adam.setup(arcface) updater = StandardUpdater(iter_train, adam) trainer = Trainer(updater, (1000, 'iteration')) trainer.run()
print("train_x: {}, train_y: {}, vocab: {}".format(len(train_x), len(train_y), len(vocab))) loss = 0 average_loss = [] accuracy_list = [] epochs = 50 batch_size = 128 num_data = len(train_x) model = Seq2Seq( vocab_size=len(vocab), embed_size=512, hidden_size=512, ) optimizer = Adam() optimizer.setup(model) optimizer.add_hook(GradientClipping(5)) for c, i in vocab.items(): tmp_vocab[i] = c # timer start_at = time.time() cur_at = start_at # エポックを回す for epoch in range(0, epochs): print('EPOCH: {}/{}'.format(epoch+1, epochs)) perm = np.random.permutation(num_data) # ランダムサンプリング
class QNeuralNetwork(QModel): def __init__(self, model, target, device_id=-1, learning_rate=0.00025, momentum=.9, minibatch_size=32, update_interval=10000): assert isinstance(model, ChainerModel), \ 'model should inherit from ChainerModel' super(QNeuralNetwork, self).__init__(model.input_shape, model.output_shape) self._gpu_device = None self._loss_val = 0 # Target model update method self._steps = 0 self._target_update_interval = update_interval # Setup model and target network self._minibatch_size = minibatch_size self._model = model self._target = target self._target.copyparams(self._model) # If GPU move to GPU memory if device_id >= 0: with cuda.get_device(device_id) as device: self._gpu_device = device self._model.to_gpu(device) self._target.to_gpu(device) # Setup optimizer self._optimizer = Adam(learning_rate, momentum, 0.999) self._optimizer.setup(self._model) def evaluate(self, environment, model=QModel.ACTION_VALUE_NETWORK): if check_rank(environment.shape, get_rank(self._input_shape)): environment = environment.reshape((1,) + environment.shape) # Move data if necessary if self._gpu_device is not None: environment = cuda.to_gpu(environment, self._gpu_device) if model == QModel.ACTION_VALUE_NETWORK: output = self._model(environment) else: output = self._target(environment) return cuda.to_cpu(output.data) def train(self, x, y, actions=None): actions = actions.astype(np.int32) batch_size = len(actions) if self._gpu_device: x = cuda.to_gpu(x, self._gpu_device) y = cuda.to_gpu(y, self._gpu_device) actions = cuda.to_gpu(actions, self._gpu_device) q = self._model(x) q_subset = F.reshape(F.select_item(q, actions), (batch_size, 1)) y = y.reshape(batch_size, 1) loss = F.sum(F.huber_loss(q_subset, y, 1.0)) self._model.cleargrads() loss.backward() self._optimizer.update() self._loss_val = np.asscalar(cuda.to_cpu(loss.data)) # Keeps track of the number of train() calls self._steps += 1 if self._steps % self._target_update_interval == 0: # copy weights self._target.copyparams(self._model) @property def loss_val(self): return self._loss_val # / self._minibatch_size def save(self, output_file): save_npz(output_file, self._model) def load(self, input_file): load_npz(input_file, self._model) # Copy parameter from model to target self._target.copyparams(self._model)
class RNN(object): def __init__(self, n_words, emb_size, n_hidden, n_classes, classes): self.model = chainer.FunctionSet( Emb=F.EmbedID(n_words, emb_size), W=F.Linear(emb_size, n_hidden), U=F.Linear(n_hidden, n_hidden), O=F.Linear(n_hidden, n_classes) ) self.n_hidden = n_hidden self.n_clsses = n_classes self.emb_size = emb_size self.classes = classes self.classes_rev = {v: k for k, v in classes.iteritems()} for param in self.model.parameters: param[:] = np.random.randn(*param.shape) * 0.1 self.optimizer = Adam(alpha=0.001, beta1=0.9, beta2=0.999, eps=1e-8) self.optimizer.setup(self.model) def forward_loss(self, mb_x, mb_y, train=True): mb_size = mb_x.shape[0] n_steps = mb_x.shape[1] loss = 0.0 h = chainer.Variable(np.zeros((mb_size, self.n_hidden), dtype='float32'), volatile=not train) y_hat = [] for i in range(n_steps): x_i = chainer.Variable(mb_x[:, i], volatile=not train) y_i = chainer.Variable(mb_y[:, i], volatile=not train) h = self.model.W(self.model.Emb(x_i)) + self.model.U(h) out = self.model.O(h) curr_loss = F.softmax_cross_entropy(out, y_i) y_hat.append(curr_loss.creator.y) loss += curr_loss * 1.0 / (n_steps * mb_size) y_hat = np.array(y_hat).swapaxes(0, 1) return loss, y_hat def learn(self, x, y): self.optimizer.zero_grads() loss, y_hat = self.forward_loss(x, y, train=True) loss.backward() self.optimizer.update() return loss.data def predict(self, x): _, y_hat = self.forward_loss(x, np.zeros(x.shape, dtype='int32')) return np.argmax(y_hat, axis=2) def predictions_to_text(self, y): return [self.classes_rev.get(i, '#EOS') for i in y] def eval(self, mb_x, mb_y): mb_y_hat = self.predict(mb_x) t = self.predictions_to_text acc = sklearn.metrics.accuracy_score(mb_y.flat[mb_y.flat != -1], mb_y_hat.flat[mb_y.flat != -1]) prec = sklearn.metrics.precision_score(mb_y.flat[mb_y.flat != -1], mb_y_hat.flat[mb_y.flat != -1]) recall = sklearn.metrics.recall_score(mb_y.flat[mb_y.flat != -1], mb_y_hat.flat[mb_y.flat != -1]) report = sklearn.metrics.classification_report(t(mb_y.flat[mb_y.flat != -1]), t(mb_y_hat.flat[mb_y.flat != -1])) return acc, prec, recall, report, mb_y_hat
def main(): parser = argparse.ArgumentParser() parser.add_argument('--batchsize', '-b', type=int, default=100, help='Number of examples in each mini-batch') parser.add_argument('--bproplen', '-l', type=int, default=200, help='Number of words in each mini-batch ' '(= length of truncated BPTT)') parser.add_argument('--epoch', '-e', type=int, default=40, help='Number of sweeps over the dataset to train') parser.add_argument('--gpu', '-g', type=int, default=0, help='GPU ID (negative value indicates CPU)') parser.add_argument('--out', '-o', default='result', help='Directory to output the result') parser.add_argument('--file', default="enwik8", help='path to text file for training') parser.add_argument('--unit', '-u', type=int, default=2800, help='Number of LSTM units') parser.add_argument('--embd', type=int, default=400, help='Number of embedding units') parser.add_argument('--hdrop', type=float, default=0.2, help='hidden state dropout (variational)') parser.add_argument('--edrop', type=float, default=0.5, help='embedding dropout') args = parser.parse_args() nembd = args.embd #number of training iterations per model save, log write, and validation set evaluation interval =100 pdrop = args.hdrop pdrope = args.edrop #initial learning rate alpha0 = .001 #inverse of linear decay rate towards 0 dec_it = 12*9000 #minimum learning rate alpha_min = .00007 #first ntrain words of dataset will be used for training ntrain = 90000000 seqlen = args.bproplen nbatch = args.batchsize filename= args.file text,mapping = get_char(filename) sequence = np.array(text).astype(np.int32) itrain =sequence[0:ntrain] ttrain = sequence[1:ntrain+1] fullseql=int(ntrain/nbatch) itrain = itrain.reshape(nbatch,fullseql) ttrain = ttrain.reshape(nbatch,fullseql) #doesn't use full validations set nval = 500000 ival = sequence[ntrain:ntrain+nval] tval = sequence[ntrain+1:ntrain+nval+1] ival = ival.reshape(ival.shape[0]//1000,1000) tval = tval.reshape(tval.shape[0]//1000,1000) #test = sequence[ntrain+nval:ntrain+nval+ntest] nvocab = max(sequence) + 1 # train is just an array of integers print('#vocab =', nvocab) # Prepare an RNNLM model rnn = RNNForLM(nvocab, args.unit,args.embd) model = L.Classifier(rnn) model.compute_accuracy = False # we only want the perplexity if args.gpu >= 0: chainer.cuda.get_device(args.gpu).use() # make the GPU current model.to_gpu() # Set up an optimizer optimizer = Adam(alpha=alpha0) optimizer.setup(model) resultdir = args.out print('starting') nepoch = args.epoch start = 0 loss_sum = 0; if not os.path.isdir(resultdir): os.mkdir(resultdir) vloss = test(rnn,ival,tval) vloss= (1.4427*vloss) f = open(os.path.join(resultdir,'log'), 'w') outstring = "Initial Validation loss (bits/word): " + str(vloss) + '\n' f.write(outstring) f.close() i=0 epoch_num = 0 it_num = 0 while True: # Get the result of the forward pass. fin = start+seqlen if fin>(itrain.shape[1]): start = 0 fin = start+seqlen epoch_num = epoch_num+1 if epoch_num== nepoch: break inputs = itrain[:,start:fin] targets = ttrain[:,start:fin] start = fin inputs = Variable(inputs) targets = Variable(targets) targets.to_gpu() inputs.to_gpu() it_num+=1 loss = 0 rnn.applyWN() #make hidden dropout mask mask = cp.zeros((inputs.shape[0],args.unit),dtype = cp.float32) ind = cp.nonzero(cp.random.rand(inputs.shape[0],args.unit)>pdrop) mask[ind] = 1/(1-pdrop) #make embedding dropout mask mask2 = cp.zeros((inputs.shape[0],nembd),dtype = cp.float32) ind = cp.nonzero(cp.random.rand(inputs.shape[0],nembd)>pdrope) mask2[ind] = 1/(1-pdrope) for j in range(seqlen): output = rnn(inputs[:,j],mask,mask2) loss = loss+ F.softmax_cross_entropy(output,targets[:,j]) loss = loss/(seqlen) # Zero all gradients before updating them. rnn.zerograds() loss_sum += loss.data # Calculate and update all gradients. loss.backward() s = 0; # Use the optmizer to move all parameters of the network # to values which will reduce the loss. optimizer.update() #decays learning rate linearly optimizer.alpha = alpha0*(dec_it-it_num)/float(dec_it) #prevents learning rate from going below minumum if optimizer.alpha<alpha_min: optimizer.alpha = alpha_min loss.unchain_backward() if ((i+1)%interval) ==0: rnn.reset_state() vloss = test(rnn,ival,tval) #converts to binary entropy vloss= (1.4427*vloss) loss_sum = (1.4427*loss_sum/interval) serializers.save_npz(os.path.join(resultdir,'model'),rnn) outstring = "Training iteration: " + str(i+1) + " Training loss (bits/char): " + str(loss_sum) + " Validation loss (bits/word): " + str(vloss) + '\n' f = open(os.path.join(resultdir,'log'), 'a') f.write(outstring) f.close() print("Training iteration: " + str(i+1)) print('training loss: ' + str(loss_sum)) print('validation loss: ' + str(vloss)) loss_sum=0 i+=1
def __call__(self, x): return F.sum(x) def to_gpu(self, device=None): super(Model, self).to_gpu(device) class Dataset(DatasetMixin): def __init__(self): super(Dataset, self).__init__() def __len__(self): return 1024 def get_example(self, i): return np.array([[1, 2], [3, 4]], dtype=np.float32) dataset = Dataset() iterator = SerialIterator(dataset, 2, False, False) model_training = Model() model_training.to_gpu() optimizer = Adam() optimizer.setup(model_training) updater = StandardUpdater(iterator, optimizer, device=0) trainer = Trainer(updater, stop_trigger=[1, "iteration"]) trainer.extend(snapshot_object(model_training, "model_iter_{.updater.iteration}"), trigger=[1, "iteration"]) trainer.run() model_test = Model() load_npz("result/model_iter_1", model_test) model_test.to_gpu()
def main(): parser = argparse.ArgumentParser( formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument('--device', type=int, default=0, help='gpu id') parser.add_argument('--modelfile', help='pretrained model file of FCN8', required=True) parser.add_argument('--lr', type=float, default=5e-5, help='init learning rate') parser.add_argument('--name', type=str, default='FCN8_SEG', help='name of the experiment') parser.add_argument('--resume', type=bool, default=False, help='resume training or not') parser.add_argument('--snapshot', type=str, help='snapshot file to resume from') parser.add_argument('--lambda1', default=1, type=float, help='lambda1 param') parser.add_argument('--lambda2', default=1, type=float, help='lambda2 param') parser.add_argument('--lambda3', default=1.5, type=float, help='lambda3 param') #total_loss = self.lambd1 * cl_loss + self.lambd2 * am_loss + self.lambd3*segment_loss args = parser.parse_args() resume = args.resume device = args.device if resume: load_snapshot_path = args.snapshot load_model_path = args.modelfile print("Resuming from model {}, snapshot {}".format(load_model_path, load_snapshot_path)) else: pretrained_model_path = args.modelfile experiment = args.name lr = args.lr optim = Adam training_interval = (20000, 'iteration') snapshot_interval = (1000, 'iteration') lambd1 = args.lambda1 lambd2 = args.lambda2 lambd3 = args.lambda3 updtr = VOC_SEG_Updater_v2 os.makedirs('result/'+experiment, exist_ok=True) f = open('result/'+experiment+'/details.txt', "w+") f.write("lr - "+str(lr)+"\n") f.write("optimizer - "+str(optim)+"\n") f.write("lambd1 - "+str(lambd1)+"\n") f.write("lambd2 - "+str(lambd2)+"\n") f.write("lambd3 - "+str(lambd3)+"\n") f.write("training_interval - "+str(training_interval)+"\n") f.write("Updater - "+str(updtr)+"\n") f.close() if resume: model = FCN8s_hand() chainer.serializers.load_npz(load_model_path, model) else: model = FCN8s_hand() chainer.serializers.load_npz(pretrained_model_path, model) if device >= 0: model.to_gpu(device) dataset = MyTrainingDataset() iterator = SerialIterator(dataset, 1, shuffle=False) optimizer = Adam(alpha=lr) optimizer.setup(model) updater = updtr(iterator, optimizer, device=device, lambd1=lambd1, lambd2=lambd2) trainer = Trainer(updater, training_interval) log_keys = ['epoch', 'iteration', 'main/SG_Loss', 'main/TotalLoss'] trainer.extend(extensions.LogReport(log_keys, (10, 'iteration'), log_name='log'+experiment)) trainer.extend(extensions.PrintReport(log_keys), trigger=(100, 'iteration')) trainer.extend(extensions.ProgressBar(training_length=training_interval, update_interval=100)) trainer.extend(extensions.snapshot(filename=experiment+'_snapshot_{.updater.iteration}'), trigger=snapshot_interval) trainer.extend(extensions.snapshot_object(trainer.updater._optimizers['main'].target, experiment+'_model_{.updater.iteration}'), trigger=snapshot_interval) trainer.extend( extensions.PlotReport(['main/SG_Loss'], 'iteration', (20, 'iteration'), file_name=experiment + '/sg_loss.png',grid=True, marker=" ")) trainer.extend(extensions.PlotReport(['main/TotalLoss'], 'iteration',(20, 'iteration'), file_name=experiment+'/total_loss.png', grid=True, marker=" ")) trainer.extend(extensions.PlotReport(log_keys[2:], 'iteration',(20, 'iteration'), file_name=experiment+'/all_loss.png', grid=True, marker=" ")) if resume: chainer.serializers.load_npz(load_snapshot_path, trainer) print("Running - - ", experiment) print('initial lr ',lr) print('optimizer ', optim) print('lambd1 ', lambd1) print('lambd2 ', lambd2) print('lambd3', lambd3) trainer.run()
def train_step(fname_train, fname_label, fname_model, N_train, N_test, N_epoch, batchsize, hgh, wid, mode): print("Start training.") print("train:", fname_train, "label", fname_label) print("N_train:", N_train, "N_test:", N_test, "hgh:", hgh, "wid:", wid) cim = ImClass('train', fname_train=fname_train, fname_label=fname_label, N_train=N_train, N_test=N_test, hgh=hgh, wid=wid, mode=mode) model = FCN() optimizer = Adam() optimizer.setup(model) # Learning loop for epoch in range(1, N_epoch + 1): print("epoch:", epoch, "/", N_epoch) # training sum_loss_training, sum_acc_training = 0.0, 0.0 for i in range(0, N_train, batchsize): train_loss_tmp, train_acc_tmp = training_epoch( i, cim, model, optimizer, batchsize) sum_loss_training += float(train_loss_tmp) * batchsize sum_acc_training += float(train_acc_tmp) * batchsize if i == 0 or (i + batchsize) % 5000 == 0: print("training:", i + batchsize, "/", N_train, "loss:", "{:.3f}".format(float(train_loss_tmp)), "acc:", "{:.3f}".format(float(train_acc_tmp))) train_loss, train_acc = sum_loss_training / N_train, sum_acc_training / N_train # testing if N_test != 0: sum_acc_testing = 0.0 for i in range(0, N_test, batchsize): test_acc_tmp = testing_epoch(i, cim, model, batchsize) sum_acc_testing += float(test_acc_tmp) * batchsize if (i + batchsize) % 1000 == 0: print("testing:", i + batchsize, "/", N_test, "acc:", "{:.3f}".format(float(test_acc_tmp))) test_acc = sum_acc_testing / N_test print("Result", "\n", "train_loss:", "{:.3f}".format(train_loss), "\n", "train_acc:", "{:.3f}".format(train_acc), "\n", "test_acc:", "{:.3f}".format(test_acc)) else: test_acc = 0.0 data_model = {} data_model['model'] = model data_model['shape'] = (hgh, wid) data_model['testacc'] = test_acc if os.path.isfile(fname_model): warnings.warn("File is being overwritten: {}.".format(fname_model)) with open(fname_model, 'wb') as p: pickle.dump(data_model, p, -1) print("Done training.")
def main(): parser = argparse.ArgumentParser( formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument('--device', type=int, default=-1, help='gpu id') parser.add_argument('--lr_init', type=float, default=1 * 1e-7, help='init learning rate') # parser.add_argument('--lr_trigger', type=float, default=5, help='trigger to decreace learning rate') # parser.add_argument('--lr_target', type=float, default=5*1e-5, help='target learning rate') # parser.add_argument('--lr_factor', type=float, default=.75, help='decay factor') parser.add_argument('--name', type=str, default='classifier_gain_dropout', help='name of the experiment') parser.add_argument( '--modelfile', type=str, help='name of the model to resume from or if starting anew, the ' 'pretrained FCN8s_Hand model with empty final layers', required=True) parser.add_argument('--resume', type=bool, default=False, help='resume training or not') parser.add_argument('--snapshot', type=str, default=None, help='snapshot file of the trainer to resume from') args = parser.parse_args() if args.resume: assert args.snapshot is not None resume = args.resume device = args.device #os.environ["CUDA_VISIBLE_DEVICES"]=str(device) if resume: load_snapshot_path = args.snapshot experiment = args.name lr_init = args.lr_init # lr_target = args.lr_target # lr_factor = args.lr_factor # lr_trigger_interval = (args.lr_trigger, 'epoch') os.makedirs('result/' + experiment, exist_ok=True) f = open('result/' + experiment + '/details.txt', "w+") f.write("lr - " + str(lr_init) + "\n") f.write("optimizer - " + str(Adam)) # f.write("lr_trigger_interval - "+str(lr_trigger_interval)+"\n") f.close() # if resume: model_own = FCN8s_hand() chainer.serializers.load_npz(args.modelfile, model_own) if device >= 0: print('sending model to gpu ' + str(device)) model_own.to_gpu(device) dataset = MyTrainingDataset() iterator = SerialIterator(dataset, 1) optimizer = Adam(alpha=lr_init) optimizer.setup(model_own) updater = VOC_ClassificationUpdater_v2(iterator, optimizer, device=device, dropout=0.5) trainer = Trainer(updater, (100, 'epoch')) log_keys = ['epoch', 'iteration', 'main/Loss'] trainer.extend( extensions.LogReport(log_keys, (100, 'iteration'), log_name='log_' + experiment)) trainer.extend(extensions.PrintReport(log_keys), trigger=(100, 'iteration')) trainer.extend(extensions.snapshot(filename=experiment + "_snapshot_{.updater.iteration}"), trigger=(1, 'epoch')) trainer.extend(extensions.snapshot_object( trainer.updater._optimizers['main'].target, experiment + "_model_{.updater.iteration}"), trigger=(1, 'epoch')) trainer.extend( extensions.PlotReport(['main/Loss'], 'iteration', (100, 'iteration'), file_name=experiment + '/loss.png', grid=True, marker=" ")) # trainer.extend(extensions.ExponentialShift('lr', lr_factor, target=lr_target), trigger=lr_trigger_interval) if resume: chainer.serializers.load_npz(load_snapshot_path, trainer) print("Running - - ", experiment) print('initial lr ', lr_init) # print('lr_trigger_interval ', lr_trigger_interval) trainer.run()