def compile_net(net): optimizer = Adam(net.trainable_params(), learning_rate=0.1, loss_scale=1024.0, weight_decay=0.9) train_net = TrainOneStepCell(net, optimizer) _executor.compile(train_net, _x, _b)
def compile_net(net, shape): x = Tensor(np.ones(shape), dtype=ms.int32) y = Tensor(np.ones(shape), dtype=ms.float32) z = Tensor(np.ones(shape), dtype=ms.int32) optimizer = Adam(net.trainable_params(), learning_rate=0.1) train_net = TrainOneStepCell(net, optimizer) train_net.set_auto_parallel() train_net.set_train() _executor.compile(train_net, x, y, z) context.reset_auto_parallel_context()
def part_ps_impl(self, dataset): net = Menet(self.in_channels, self.out_channels, self.kernel_size, self.vocab_size, self.embedding_size, self.output_channels, self.target, self.sparse) net.embedding_lookup.set_param_ps() net.conv.conv2d.add_prim_attr('primitive_target', 'CPU') net.conv.bias_add.add_prim_attr('primitive_target', 'CPU') net.set_train() loss = SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean') opt = Adam(params=filter(lambda x: x.requires_grad, net.get_parameters())) opt.target = 'CPU' model = Model(net, loss, opt) model.train(self.epoch_size, dataset, dataset_sink_mode=False) input_me = Tensor(self.input_np) out_me = model.predict(input_me) return out_me.asnumpy()
def _model_train_and_save_ckpt(self, net, dataset, epoch): self.opt = Adam(params=net.get_parameters()) if self.target == 'CPU': self.opt.target = self.target if self.sparse: context.set_context(enable_sparse=True) self.model = Model(network=net, loss_fn=self.loss_fn, optimizer=self.opt) ckpt_config = CheckpointConfig(keep_checkpoint_max=1) ckpt_path = './rank_{}_ckpt'.format(self.global_rank_id) ckpt_callback = ModelCheckpoint(prefix='parallel', directory=ckpt_path, config=ckpt_config) clean_all_ckpt_files(ckpt_path) self.model.train(epoch=epoch, train_dataset=dataset, callbacks=[ckpt_callback], dataset_sink_mode=False) newest_ckpt_file = find_newest_ckpt_file(ckpt_path) return load_checkpoint(newest_ckpt_file)
base_params = filter(lambda p: id(p) not in ignored_params, net.get_parameters()) optimizer_P = Adam( [ { 'params': base_params, 'lr': 0.1 * args.lr }, { 'params': net.bottleneck.get_parameters(), 'lr': args.lr }, { 'params': net.classifier.get_parameters(), 'lr': args.lr }, { 'params': net.wpa.get_parameters(), 'lr': args.lr }, # {'params': net.attention_0.parameters(), 'lr': args.lr}, # {'params': net.attention_1.parameters(), 'lr': args.lr}, # {'params': net.attention_2.parameters(), 'lr': args.lr}, # {'params': net.attention_3.parameters(), 'lr': args.lr}, # {'params': net.out_att.parameters(), 'lr': args.lr} , ], learning_rate=args.lr, weight_decay=5e-4) ######################################################################## # Start Training