step += len(inputs) inp = inputs[:, 6:7] # output is (b, d1, d2, ..., dn, c) c is onehot logits # inputs = random_mask(targets, ARCDataset.WordMap["start_symbol"], ARCDataset.WordMap['pad_symbol']) answers = targets.ne(inp).to(torch.long) # print(inputs.shape, inputs_mask.shape) answers[inputs_mask[:, 6:7].eq( False)] = TaskSpecificARCDataset.WordMap['pad_symbol'] outputs = train_forward(net, inputs, inputs_mask, task, task_mask) # print(inputs.shape, outputs.shape, targets.shape, answers.shape) loss = compute_balance_loss( outputs, targets, answers, TaskSpecificARCDataset.WordMap['pad_symbol']) _, output_index = torch.max(outputs, dim=-1) output_index = output_index.to(outputs.dtype) element_accuracy = compute_element_accuracy( output_index, targets, TaskSpecificARCDataset.WordMap['pad_symbol']) mask_accuracy = compute_mask_accuracy( inp, output_index, targets, TaskSpecificARCDataset.WordMap['pad_symbol']) correct_accuracy = compute_corrects_accuracy( output_index, targets, TaskSpecificARCDataset.WordMap['pad_symbol']) loss.backward() optimizer.step()
inputs, ctx_input, targets, ctx_targets, task = to_same_size( padding, *to_device(device, *batch)) start_time = time.time() step += len(inputs) answers = targets.ne(inputs).to(torch.long) answers[inputs.eq(padding)] = padding outputs, predict_task = train_forward( net, inputs, inputs.ne(padding).to(torch.float), ctx_input, ctx_input.ne(padding).to(torch.float), ctx_targets, ctx_targets.ne(padding).to(torch.float)) loss = compute_balance_loss(outputs, targets, answers, padding) + compute_task_loss( predict_task, task[:, 0, 0], padding) _, predict_task = torch.max(predict_task, dim=-1) _, output_index = torch.max(outputs, dim=-1) output_index = output_index.to(outputs.dtype) task_accuracy = compute_task_accuracy(predict_task, task[:, 0, 0], padding) element_accuracy = compute_element_accuracy(output_index, targets, padding) mask_accuracy = compute_mask_accuracy(inputs, output_index, targets, padding) correct_accuracy = compute_corrects_accuracy(output_index, targets, padding) loss.backward() optimizer.step()