def test_big_batchSize_with_new_interface(num_classes=10, epoch=8, batch_size=338): net = resnet50(num_classes) criterion = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean') net_with_criterion = WithLossCell(net, criterion) net_with_criterion.set_train() weights = ParameterTuple( filter(lambda x: x.requires_grad, net.get_parameters())) optimizer = Momentum(weights, 0.1, 0.9) train_network = ForwardValueAndGrad(network=net_with_criterion, weights=weights, get_by_list=True, sens_param=True, sens=1.0) losses = [] for i in range(0, epoch): data = Tensor( np.ones([batch_size, 3, 224, 224]).astype(np.float32) * 0.01) label = Tensor(np.ones([batch_size]).astype(np.int32)) loss, grads = train_network(data, label) grads = F.identity(grads) optimizer(grads) losses.append(loss) assert (losses[-1].asnumpy() < 0.8)
def test_train_lenet_with_new_interface(num_classes=10, epoch=20, batch_size=32): context.set_context(mode=context.GRAPH_MODE, device_target="GPU") network = LeNet5(num_classes) criterion = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction="mean") net_with_criterion = WithLossCell(network, criterion) net_with_criterion.set_train() weights = ParameterTuple(network.trainable_params()) optimizer = nn.Momentum(weights, 0.1, 0.9) train_network = ForwardValueAndGrad(network=net_with_criterion, weights=weights, get_by_list=True, sens_param=True) losses = [] for i in range(0, epoch): data = Tensor( np.ones([batch_size, 1, 32, 32]).astype(np.float32) * 0.01) label = Tensor(np.ones([batch_size]).astype(np.int32)) sens = Tensor(np.ones([1]).astype(np.float32)) loss, grads = train_network(data, label, sens) grads = F.identity(grads) optimizer(grads) losses.append(loss) assert losses[-1].asnumpy() < 0.01 assert losses[-1].asnumpy() > 0.001
def bn_common(parallel_mode, train_flag, strategy_loss=None): context.set_context(mode=context.GRAPH_MODE) context.set_auto_parallel_context(parallel_mode=parallel_mode, device_num=8) learning_rate = 0.1 momentum = 0.9 epoch_size = 2 rank_size = 8 predict = Tensor(np.ones([32, 512]), dtype=ms.float32) label = Tensor(np.ones([32]), dtype=ms.int32) dataset = Dataset(predict, label, 2) net = bn_net() loss = SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean') loss.softmax_cross_entropy.shard(strategy_loss) opt = Momentum(net.trainable_params(), learning_rate, momentum, 0.0001, 1024 * rank_size) if not train_flag: net = WithLossCell(net, loss) net.set_train() if parallel_mode == ParallelMode.DATA_PARALLEL: context.set_auto_parallel_context(parameter_broadcast=True) model = Model(net, loss, opt) if train_flag: model.train(epoch_size, dataset, dataset_sink_mode=False) else: model._predict(predict, label)
loss_scale = FixedLossScaleManager(config.loss_scale, drop_overflow_update=False) opt = Momentum(filter(lambda x: x.requires_grad, net.get_parameters()), lr, config.momentum, \ config.weight_decay, config.loss_scale) model = Model(net, loss_fn=loss, optimizer=opt, loss_scale_manager=loss_scale) cb = config_ckpoint(config, lr, step_size) print("============== Starting Training ==============") model.train(epoch_size, dataset, callbacks=cb) print("============== End Training ==============") else: opt = Momentum(filter(lambda x: x.requires_grad, head_net.get_parameters()), lr, config.momentum, config.weight_decay) network = WithLossCell(head_net, loss) network = TrainOneStepCell(network, opt) network.set_train() features_path = args_opt.dataset_path + '_features' idx_list = list(range(step_size)) rank = 0 if config.run_distribute: rank = get_rank() save_ckpt_path = os.path.join(config.save_checkpoint_path, 'ckpt_' + str(rank) + '/') if not os.path.isdir(save_ckpt_path): os.mkdir(save_ckpt_path) for epoch in range(epoch_size): random.shuffle(idx_list) epoch_start = time.time() losses = [] for j in idx_list: