# sequence prediction error is calculted in bits per sequence error = torch.sum(torch.abs(binary_output - target)) errors.append(error.item()) # ---logging--- if iter % 100 == 0 and iter != 0: print( '[*] Iteration: %d\tLoss: %.2f\tError in bits per sequence: %.2f' % (iter, np.mean(losses), np.mean(errors))) log_value('train_loss', np.mean(losses), iter) log_value('bit_error_per_sequence', np.mean(errors), iter) losses = [] errors = [] # ---checkpoint--- if iter % args.checkpoint == 0 and iter != 0: print('[*] Creating a checkpoint:') torch.save(ntm.state_dict(), PATH + ".checkpoint_{}".format(iter)) # Save an image with generate_target_original_plots(iter, task_params, PATH + ".checkpoint_{}".format(iter), bare_path + "/images") # Save all the configurations used with f as open(PATH + ".config_{}".format(iter), "w+"): f.write(str(task_params) + "\n" + str(args)) # ---saving the model--- torch.save(ntm.state_dict(), PATH)
''' # ------------------------------------------------------------------------- loss = criterion(out, target) losses.append(loss.item()) loss.backward() # clips gradient in the range [-10,10]. Again there is a slight but # insignificant deviation from the paper where they are clipped to (-10,10) nn.utils.clip_grad_value_(ntm.parameters(), 10) optimizer.step() binary_output = out.clone() binary_output = binary_output.detach().apply_(lambda x: 0 if x < 0.5 else 1) # sequence prediction error is calculted in bits per sequence error = torch.sum(torch.abs(binary_output - target)) errors.append(error.item()) # ---logging--- if iter % 200 == 0: print('Iteration: %d\tLoss: %.2f\tError in bits per sequence: %.2f' % (iter, np.mean(losses), np.mean(errors))) log_value('train_loss', np.mean(losses), iter) log_value('bit_error_per_sequence', np.mean(errors), iter) losses = [] errors = [] # ---saving the model--- torch.save(ntm.state_dict(), PATH) # torch.save(ntm, PATH)
# Get the outputs from memory without real inputs zero_inputs = torch.zeros(inputs.size()[1]).unsqueeze(0) # dummy inputs for i in range(target.size()[0]): out[i] = ntm(zero_inputs) # Compute loss, backprop, and optimize loss = criterion(out, target) losses.append(loss.item()) loss.backward() nn.utils.clip_grad_value_(ntm.parameters(), 10) optimizer.step() # Calculate binary outputs binary_output = out.clone() binary_output = binary_output.detach().apply_(lambda x: 0 if x < 0.5 else 1) # Sequence prediction error is calculted in bits per sequence error = torch.sum(torch.abs(binary_output - target)) errors.append(error.item()) # Print Stats if step % 200 == 0: print('Step {} == Loss {:.3f} == Error {} bits per sequence'.format( step, np.mean(losses), np.mean(errors))) losses = [] errors = [] # Save model torch.save(ntm.state_dict(), args.saved_model)
binary_output = out.clone() binary_output = binary_output.detach().apply_(lambda x: 0 if x < 0.5 else 1) # sequence prediction error is calculted in bits per sequence error = torch.sum(torch.abs(binary_output - target)) errors.append(error.item()) # ---logging--- if iter % 200 == 0: if (iter%400==0) and has_tau: if model.cell.tau<marnn_config.max_tau: model.cell.set_tau(model.cell.tau+1.) print('=======>set tau:',model.cell.tau) sec=time.time()-start_time min=sec//60 sec=sec%60 print('Iteration: %d\tLoss: %.4f\tError in bits per sequence: %.4f, time elapsed:%dmin%.2fsec' % (iter, np.mean(losses), np.mean(errors),min,sec)) print(iter,np.mean(losses),np.mean(errors)) log_value('train_loss', np.mean(losses), iter) log_value('bit_error_per_sequence', np.mean(errors), iter) if best_loss>=np.mean(losses): best_loss=np.mean(losses) best_state_dict=model.state_dict() torch.save(best_state_dict, PATH+'.best') losses = [] errors = [] torch.save(model.state_dict(),PATH) # ---saving the model---
losses += [loss.item()] if e % 50 == 0: mean_loss = np.array(losses[-50:]).mean() print("Loss: ", loss.item()) writer.add_scalar('Mean loss', loss.item(), e) if e % 1000 == 0: for name, param in model.named_parameters(): writer.add_histogram(name, param.clone().cpu().data.numpy(), e) mem_pic, read_pic, write_pic = model.get_memory_info() pic1 = vutils.make_grid(y_pred, normalize=True, scale_each=True) pic2 = vutils.make_grid(Y, normalize=True, scale_each=True) pic3 = vutils.make_grid(mem_pic, normalize=True, scale_each=True) pic4 = vutils.make_grid(read_pic, normalize=True, scale_each=True) pic5 = vutils.make_grid(write_pic, normalize=True, scale_each=True) writer.add_image('NTM output', pic1, e) writer.add_image('True output', pic2, e) writer.add_image('Memory', pic3, e) writer.add_image('Read weights', pic4, e) writer.add_image('Write weights', pic5, e) torch.save(model.state_dict(), args.savemodel) losses = []
bit = 1 else: sampleIdx = sampleIdxGreen bit = 0 start = random.sample(sampleIdx, 1)[0] sampleIdx.remove(start) print(start) imageSequence = np.load("sequences/image/imageSequence_" + str(start) + ".npy") robotGpsSequence = np.load("sequences/robot/robotGpsSequence_" + str(start) + ".npy") actionSequence = np.load("sequences/action/actionSequence_" + str(start) + ".npy") imageSequence = torch.from_numpy(imageSequence).float() robotGpsSequence = torch.from_numpy(robotGpsSequence).float() y = torch.from_numpy(actionSequence).float() loss = ntm.train(imageSequence, y, robotGpsSequence, learning_rate) losses.append(loss.detach().numpy()) print(i, inEpochCtr, loss) ctr += 1 torch.save(ntm.state_dict(), "ntm.pt") np.save("losses/ntm", np.array(losses))