def generate_static_step(net: Module, opt_factory: Callable[[Module], Optimizer]): data = tensor(np.zeros(data_shape), dtype=np.float32) label = tensor(np.zeros(label_shape), dtype=np.int32) opt = opt_factory(net) # Save state to reset parameters later. state = copy.deepcopy(net.state_dict()) # Evaluate network in eager mode once. pred = net(data) loss = cross_entropy_with_softmax(pred, label) opt.zero_grad() grads = opt.backward(loss) f = mge.graph.compile(loss, grads) def step(data, label): opt.zero_grad() out = f(data=data, label=label) opt.step() loss = out[0][0] return loss # Reset parameters. net.load_state_dict(state) return step
def train_fun(data, label): opt.clear_grad() with gm: pred = net(data) loss = F.cross_entropy_with_softmax(pred, label) gm.backward(loss) opt.step() return pred, loss
def train_func(data, label): logits = net(data) loss = F.cross_entropy_with_softmax(logits, label) if num_gpu: loss = loss / num_gpu opt.zero_grad() opt.backward(loss) return loss
def valid_func(image, label): model.eval() logits = model(image) loss = F.cross_entropy_with_softmax(logits, label, label_smooth=0.1) acc1, acc5 = F.accuracy(logits, label, (1, 5)) if dist.is_distributed(): # all_reduce_mean loss = dist.all_reduce_sum(loss) / dist.get_world_size() acc1 = dist.all_reduce_sum(acc1) / dist.get_world_size() acc5 = dist.all_reduce_sum(acc5) / dist.get_world_size() return loss, acc1, acc5
def train_func(image, label): model.train() logits = model(image) loss = F.cross_entropy_with_softmax(logits, label, label_smooth=0.1) acc1, acc5 = F.accuracy(logits, label, (1, 5)) optimizer.backward(loss) # compute gradients if dist.is_distributed(): # all_reduce_mean loss = dist.all_reduce_sum(loss) / dist.get_world_size() acc1 = dist.all_reduce_sum(acc1) / dist.get_world_size() acc5 = dist.all_reduce_sum(acc5) / dist.get_world_size() return loss, acc1, acc5
def step(data, label): opt.zero_grad() data_inp.set_value(data) label_inp.set_value(label) pred = net(data_inp) loss = cross_entropy_with_softmax(pred, label_inp) opt.backward(loss) opt.step() return loss.numpy()[0]
def calculate_scale(image, label): model.eval() enable_observer(model) logits = model(image) loss = F.cross_entropy_with_softmax(logits, label, label_smooth=0.1) acc1, acc5 = F.accuracy(logits, label, (1, 5)) if dist.is_distributed(): # all_reduce_mean loss = dist.all_reduce_sum(loss, "valid_loss") / dist.get_world_size() acc1 = dist.all_reduce_sum(acc1, "valid_acc1") / dist.get_world_size() acc5 = dist.all_reduce_sum(acc5, "valid_acc5") / dist.get_world_size() return loss, acc1, acc5
def forward(self, input_ids, token_type_ids=None, attention_mask=None, labels=None): _, pooled_output = self.bert(input_ids, token_type_ids, attention_mask, output_all_encoded_layers=False) pooled_output = self.dropout(pooled_output) logits = self.classifier(pooled_output) if labels is not None: loss = cross_entropy_with_softmax( logits.reshape(-1, self.num_labels), labels.reshape(-1)) return logits, loss else: return logits, None
def train_func(data, label, net=None, optimizer=None): net.train() pred = net(data) loss = F.cross_entropy_with_softmax(pred, label) optimizer.backward(loss) return pred, loss
def train_func(data, label): pred = net(data) loss = F.cross_entropy_with_softmax(pred, label) opt.backward(loss) return loss
def val_fun(data, label): pred = net(data) loss = F.cross_entropy_with_softmax(pred, label) return pred, loss
def train(data, label): pred = net(data) opt.zero_grad() loss = cross_entropy_with_softmax(pred, label) opt.backward(loss) return loss
def train_func(data, label, *, net, optimizer): pred = net(data) loss = F.cross_entropy_with_softmax(pred, label) optimizer.backward(loss)
def val_fun(data, label, net=None): net.eval() pred = net(data) loss = F.cross_entropy_with_softmax(pred, label) return pred, loss
total_epochs = 100 loss_src = 1000000 for epoch in range(total_epochs): total_loss = 0 correct = 0 total = 0 for step, (inputs_batched, labels_batched) in enumerate(dataloader): labels_batched = np.squeeze(labels_batched, -1).astype(np.int32) image.set_value(inputs_batched) label.set_value(labels_batched) optimizer.zero_grad() # 将参数的梯度置零 logits = le_net(image) loss = F.cross_entropy_with_softmax(logits, label) optimizer.backward(loss) optimizer.step() # 根据梯度更新参数值 total_loss += loss.numpy().item() predicted = F.argmax(logits, axis=1) correct += ((predicted == label).sum().numpy().item() / (256 * 256.)) total += label.shape[0] print("epoch: {:0>3}, loss {:.4f}, acc {:.4f}".format( epoch, total_loss / len(dataloader), correct / total)) epoch_loss = total_loss / len(dataloader) if epoch_loss < loss_src: print("model saved")
def train_func(data, label, *, opt, model): logits = model(data) loss = F.cross_entropy_with_softmax(logits, label) opt.backward(loss) return logits, loss