Example #1
0
def generate_static_step(net: Module, opt_factory: Callable[[Module], Optimizer]):
    data = tensor(np.zeros(data_shape), dtype=np.float32)
    label = tensor(np.zeros(label_shape), dtype=np.int32)
    opt = opt_factory(net)

    # Save state to reset parameters later.
    state = copy.deepcopy(net.state_dict())

    # Evaluate network in eager mode once.
    pred = net(data)
    loss = cross_entropy_with_softmax(pred, label)
    opt.zero_grad()
    grads = opt.backward(loss)

    f = mge.graph.compile(loss, grads)

    def step(data, label):
        opt.zero_grad()
        out = f(data=data, label=label)
        opt.step()
        loss = out[0][0]
        return loss

    # Reset parameters.
    net.load_state_dict(state)
    return step
Example #2
0
 def train_fun(data, label):
     opt.clear_grad()
     with gm:
         pred = net(data)
         loss = F.cross_entropy_with_softmax(pred, label)
         gm.backward(loss)
     opt.step()
     return pred, loss
Example #3
0
    def train_func(data, label):
        logits = net(data)
        loss = F.cross_entropy_with_softmax(logits, label)

        if num_gpu:
            loss = loss / num_gpu

        opt.zero_grad()
        opt.backward(loss)
        return loss
Example #4
0
 def valid_func(image, label):
     model.eval()
     logits = model(image)
     loss = F.cross_entropy_with_softmax(logits, label, label_smooth=0.1)
     acc1, acc5 = F.accuracy(logits, label, (1, 5))
     if dist.is_distributed():  # all_reduce_mean
         loss = dist.all_reduce_sum(loss) / dist.get_world_size()
         acc1 = dist.all_reduce_sum(acc1) / dist.get_world_size()
         acc5 = dist.all_reduce_sum(acc5) / dist.get_world_size()
     return loss, acc1, acc5
Example #5
0
 def train_func(image, label):
     model.train()
     logits = model(image)
     loss = F.cross_entropy_with_softmax(logits, label, label_smooth=0.1)
     acc1, acc5 = F.accuracy(logits, label, (1, 5))
     optimizer.backward(loss)  # compute gradients
     if dist.is_distributed():  # all_reduce_mean
         loss = dist.all_reduce_sum(loss) / dist.get_world_size()
         acc1 = dist.all_reduce_sum(acc1) / dist.get_world_size()
         acc5 = dist.all_reduce_sum(acc5) / dist.get_world_size()
     return loss, acc1, acc5
Example #6
0
    def step(data, label):
        opt.zero_grad()
        data_inp.set_value(data)
        label_inp.set_value(label)

        pred = net(data_inp)
        loss = cross_entropy_with_softmax(pred, label_inp)
        opt.backward(loss)
        opt.step()

        return loss.numpy()[0]
Example #7
0
 def calculate_scale(image, label):
     model.eval()
     enable_observer(model)
     logits = model(image)
     loss = F.cross_entropy_with_softmax(logits, label, label_smooth=0.1)
     acc1, acc5 = F.accuracy(logits, label, (1, 5))
     if dist.is_distributed():  # all_reduce_mean
         loss = dist.all_reduce_sum(loss, "valid_loss") / dist.get_world_size()
         acc1 = dist.all_reduce_sum(acc1, "valid_acc1") / dist.get_world_size()
         acc5 = dist.all_reduce_sum(acc5, "valid_acc5") / dist.get_world_size()
     return loss, acc1, acc5
Example #8
0
    def forward(self,
                input_ids,
                token_type_ids=None,
                attention_mask=None,
                labels=None):
        _, pooled_output = self.bert(input_ids,
                                     token_type_ids,
                                     attention_mask,
                                     output_all_encoded_layers=False)
        pooled_output = self.dropout(pooled_output)
        logits = self.classifier(pooled_output)

        if labels is not None:
            loss = cross_entropy_with_softmax(
                logits.reshape(-1, self.num_labels), labels.reshape(-1))
            return logits, loss
        else:
            return logits, None
Example #9
0
def train_func(data, label, net=None, optimizer=None):
    net.train()
    pred = net(data)
    loss = F.cross_entropy_with_softmax(pred, label)
    optimizer.backward(loss)
    return pred, loss
Example #10
0
 def train_func(data, label):
     pred = net(data)
     loss = F.cross_entropy_with_softmax(pred, label)
     opt.backward(loss)
     return loss
Example #11
0
 def val_fun(data, label):
     pred = net(data)
     loss = F.cross_entropy_with_softmax(pred, label)
     return pred, loss
Example #12
0
 def train(data, label):
     pred = net(data)
     opt.zero_grad()
     loss = cross_entropy_with_softmax(pred, label)
     opt.backward(loss)
     return loss
Example #13
0
def train_func(data, label, *, net, optimizer):
    pred = net(data)
    loss = F.cross_entropy_with_softmax(pred, label)
    optimizer.backward(loss)
Example #14
0
def val_fun(data, label, net=None):
    net.eval()
    pred = net(data)
    loss = F.cross_entropy_with_softmax(pred, label)
    return pred, loss
Example #15
0
total_epochs = 100
loss_src = 1000000
for epoch in range(total_epochs):
    total_loss = 0
    correct = 0
    total = 0
    for step, (inputs_batched, labels_batched) in enumerate(dataloader):
        labels_batched = np.squeeze(labels_batched, -1).astype(np.int32)

        image.set_value(inputs_batched)
        label.set_value(labels_batched)

        optimizer.zero_grad()  # 将参数的梯度置零
        logits = le_net(image)

        loss = F.cross_entropy_with_softmax(logits, label)

        optimizer.backward(loss)
        optimizer.step()  # 根据梯度更新参数值

        total_loss += loss.numpy().item()
        predicted = F.argmax(logits, axis=1)
        correct += ((predicted == label).sum().numpy().item() / (256 * 256.))
        total += label.shape[0]

    print("epoch: {:0>3}, loss {:.4f}, acc {:.4f}".format(
        epoch, total_loss / len(dataloader), correct / total))

    epoch_loss = total_loss / len(dataloader)
    if epoch_loss < loss_src:
        print("model saved")
Example #16
0
def train_func(data, label, *, opt, model):
    logits = model(data)
    loss = F.cross_entropy_with_softmax(logits, label)
    opt.backward(loss)
    return logits, loss