False)] = TaskSpecificARCDataset.WordMap['pad_symbol'] outputs = train_forward(net, inputs, inputs_mask, task, task_mask) # print(inputs.shape, outputs.shape, targets.shape, answers.shape) loss = compute_balance_loss( outputs, targets, answers, TaskSpecificARCDataset.WordMap['pad_symbol']) _, output_index = torch.max(outputs, dim=-1) output_index = output_index.to(outputs.dtype) element_accuracy = compute_element_accuracy( output_index, targets, TaskSpecificARCDataset.WordMap['pad_symbol']) mask_accuracy = compute_mask_accuracy( inp, output_index, targets, TaskSpecificARCDataset.WordMap['pad_symbol']) correct_accuracy = compute_corrects_accuracy( output_index, targets, TaskSpecificARCDataset.WordMap['pad_symbol']) loss.backward() optimizer.step() optimizer.zero_grad() loging(writer, 'train', epoch, step, time.time() - start_time, dataset_size=len(dataset), batch_size=len(inputs), **{
inputs.ne(padding).to(torch.float), ctx_input, ctx_input.ne(padding).to(torch.float), ctx_targets, ctx_targets.ne(padding).to(torch.float)) loss = compute_balance_loss(outputs, targets, answers, padding) + compute_task_loss( predict_task, task[:, 0, 0], padding) _, predict_task = torch.max(predict_task, dim=-1) _, output_index = torch.max(outputs, dim=-1) output_index = output_index.to(outputs.dtype) task_accuracy = compute_task_accuracy(predict_task, task[:, 0, 0], padding) element_accuracy = compute_element_accuracy(output_index, targets, padding) mask_accuracy = compute_mask_accuracy(inputs, output_index, targets, padding) correct_accuracy = compute_corrects_accuracy(output_index, targets, padding) loss.backward() optimizer.step() optimizer.zero_grad() loging(writer, 'train', epoch, step, time.time() - start_time, dataset_size=len(dataset), batch_size=len(inputs), **{ 'loss': loss, 'element_accuracy': element_accuracy,