def test_cross_entropy_with_softmax(): data1_shape = (1, 2) label1_shape = (1, ) data2_shape = (1, 3) label2_shape = (1, ) data1 = np.array([1, 0.5], dtype=np.float32).reshape(data1_shape) label1 = np.array([1], dtype=np.int32).reshape(label1_shape) expect1 = F.cross_entropy(F.softmax(tensor(data1)), tensor(label1)).numpy() data2 = np.array([0.3, 0.4, 0.3], dtype=np.float32).reshape(data2_shape) label2 = np.array([1], dtype=np.int32).reshape(label2_shape) expect2 = F.cross_entropy(F.softmax(tensor(data2)), tensor(label2)).numpy() cases = [ { "input": [data1, label1], "output": expect1, }, { "input": [data2, label2], "output": expect2, }, ] opr_test(cases, F.cross_entropy_with_softmax)
def test_release_memory(): mnist_datasets = load_mnist_datasets() data_train, label_train = mnist_datasets["train"] batch_size = 15000 data_shape = (batch_size, 1, 28, 28) label_shape = (batch_size, ) data = nn.Input("data", shape=data_shape, dtype=np.float32) label = nn.Input("label", shape=label_shape, dtype=np.int32, value=np.zeros(label_shape)) net = MnistNet() opt = SGD(net.parameters(), lr=0.01) pred = F.softmax(net(data)) loss = F.cross_entropy(pred, label) opt.zero_grad() opt.backward(loss) add_updates = opt.step() mge.graph._default_graph.get_default().clear_device_memory() f = mge.graph.compile(loss, add_updates) for _ in range(3): train_loss = 0.0 for i in range(0, data_train.shape[0], batch_size): opt.zero_grad() data = data_train[i:i + batch_size, :, :, :] label = label_train[i:i + batch_size] loss = f(data=data, label=label)[0] train_loss += loss[0]
lr=setting.learning_rate, # 学习速率 ) total_epochs = 10 for epoch in range(total_epochs): total_loss = 0 batch_generator = dataset.batch_generator() accs = 0 step_count = 0 for step, (batch_data, batch_label) in enumerate(batch_generator): data.set_value(batch_data) label.set_value(batch_label) optimizer.zero_grad() # 将参数的梯度置零 prob = model(data) loss = F.cross_entropy(prob, label) # 交叉熵损失函数 total_loss += loss.numpy().item() optimizer.backward(loss) # 反传计算梯度 optimizer.step() # 根据梯度更新参数值 acc = accuracy(prob.numpy(), batch_label) accs += acc step_count += 1 #print("step: {}, loss: {}, acc: {}".format(step, loss, ) #print(step, loss) print("epoch: {}, average loss {}, dataset len: {}, acc: {}".format( epoch, total_loss / len(dataset), len(dataset), accs / step_count)) path = '/tmp/save.mge' mge.save(model.state_dict(), path)