def use_the_model(name, omega, n_iteration, fullcover=False): writer = cv2.VideoWriter("output.avi",cv2.VideoWriter_fourcc(*"MJPG"), 30,(1200,800)) # Environment env = FireEnvironment(64, 64) # Vehicle to generate observation mask vehicle = Vehicle(n_time_windows=1024, grid_size=(64,64)) # Load the model dyn_autoencoder = DynamicAutoEncoder(SETTING, grid_size = (env.map_width, env.map_height), n_state=3, n_obs=3, encoding_dim=16, gru_hidden_dim=16) dyn_autoencoder.load_the_model(name, omega, n_iteration) ######################################## ### Interacting with the Environment ### ######################################## mask_obs, obs, state = env.reset() map_visit_mask, img_resized = vehicle.full_mask() state_est_grid = dyn_autoencoder.u_k for i in tqdm.tqdm(range(2000)): ### Collect Data from the Env. ### if fullcover: map_visit_mask, img_resized = vehicle.full_mask() else: map_visit_mask, img_resized = vehicle.generate_a_random_trajectory(state_est_grid) mask_obs, obs, state, reward = env.step(map_visit_mask) ### Run the Estimator ### state_est_grid = dyn_autoencoder.step(mask_obs, map_visit_mask) ### Render the Env. and the Est. ### img_env = env.output_image() img_state_est_grid = dyn_autoencoder.output_image(state_est_grid) render('env', img_env, 10) render('img_state_est_grid', img_state_est_grid, 10) ### Save the video img_env_uint8 = (img_env*255).astype('uint8') img_state_est_grid_uint8 = (img_state_est_grid*255).astype('uint8') backtorgb = cv2.cvtColor(img_state_est_grid_uint8,cv2.COLOR_GRAY2RGB) img = np.concatenate((img_env_uint8, backtorgb), axis=0) writer.write(img) writer.release()
def demo4_LearningPathPlanning(setting): n_sample = 100 # Environment env = FireEnvironment(64, 64) # Vehicle to generate observation mask vehicle = Vehicle(n_time_windows=512, grid_size=(64, 64), planner_type='Default') # Trainer and Estimator dyn_autoencoder = DynamicAutoEncoder(SETTING, grid_size=(env.map_width, env.map_height), n_state=3, n_obs=3, encoding_dim=16, gru_hidden_dim=16) ### DQN agent dqn_agent = DQN_Agent(state_size=16, action_size=4, replay_memory_size=1000, batch_size=64, gamma=0.99, learning_rate=0.01, target_tau=0.01, update_rate=1, seed=0) # Train Data Buffer memory = SingleTrajectoryBuffer(N_MEMORY_SIZE) # Train Iteration Logger writer = SummaryWriter() # Video Writier video_writer1 = ImageStreamWriter('LearningPlanner.avi', FPS, image_size=(1200, 820)) # Add concat. text setting_text = '' for k, v in setting.items(): setting_text += k setting_text += ':' setting_text += str(v) setting_text += '\t' writer.add_text('setting', setting_text) ######################################## ### Interacting with the Environment ### ######################################## mask_obs, obs, state = env.reset() state_est_grid = dyn_autoencoder.u_k ### Loss Monitors ### list_loss = [] list_cross_entropy_loss = [] list_entropy_loss = [] list_rewards = [] list_new_fire_count = [] list_action = [] ### Filling the Data Buffer ### for i in tqdm.tqdm(range(N_TRAIN_WAIT)): map_visit_mask, img_resized = vehicle.full_mask() mask_obs, obs, state, reward, info = env.step(map_visit_mask) memory.add(mask_obs.detach().long(), state.detach().long(), map_visit_mask.detach().long()) for i in tqdm.tqdm(range(N_TOTAL_TIME_STEPS)): # determine epsilon-greedy action from current sate h_k = dyn_autoencoder.h_k.squeeze().data.cpu().numpy() epsilon = 0.1 action = dqn_agent.act(h_k, epsilon) list_action.append(action) ### Collect Data from the Env. ### map_visit_mask, img_resized = vehicle.plan_a_trajectory( state_est_grid, n_sample, action) mask_obs, obs, state, reward, info = env.step(map_visit_mask) memory.add(mask_obs.detach().long(), state.detach().long(), map_visit_mask.detach().long()) ### Run the Estimator ### state_est_grid = dyn_autoencoder.step(mask_obs, map_visit_mask) h_kp1 = dyn_autoencoder.h_k.squeeze().data.cpu().numpy() #### Update the reinforcement learning agent ### dqn_agent.step(h_k, action, reward, h_kp1, done=False) list_rewards.append(reward) list_new_fire_count.append(info['new_fire_count']) ################################ ### Rendering and Save Video ### ################################ img_env = env.output_image() img_agent = dyn_autoencoder.output_image(state_est_grid) # State Est #blank = np.zeros((400, 200, 3)) img_top = img_env #np.concatenate((blank, img_env[:,:800], blank), axis=1) blank = np.zeros((20, 1200, 3)) img_top = np.concatenate((img_top, blank), axis=0) img_top = (img_top * 255).astype('uint8') img_state_est_grid_uint8 = (img_agent * 255).astype('uint8') backtorgb = cv2.cvtColor(img_state_est_grid_uint8, cv2.COLOR_GRAY2RGB) img_bayes_uint8 = np.concatenate((img_top, backtorgb), axis=0) #<-- to be saved render('Dynamic Auto Encoder', img_bayes_uint8, 1) # Save video # video_writer1.write_image_frame(img_bayes_uint8) ### Training ### loss_val, loss_val_cross, loss_val_ent, O_np_val = dyn_autoencoder.update( memory, N_TRAIN_BATCH, N_TRAIN_WINDOW) list_loss.append(loss_val) list_cross_entropy_loss.append(loss_val_cross) list_entropy_loss.append(loss_val_ent) if i % N_LOGGING_PERIOD == 0: avg_loss = np.mean(np.array(list_loss)) list_loss = [] writer.add_scalar('dynautoenc/loss', avg_loss, i) avg_loss_cross = np.mean(np.array(list_cross_entropy_loss)) list_cross_entropy_loss = [] writer.add_scalar('dynautoenc/crossentropy', avg_loss_cross, i) avg_loss_entropy = np.mean(np.array(list_entropy_loss)) list_entropy_loss = [] writer.add_scalar('dynautoenc/shannonentropy', avg_loss_entropy, i) avg_reward = np.mean(np.array(list_rewards)) list_rewards = [] writer.add_scalar('perform/rewards', avg_reward, i) avg_new_fire_count = np.mean(np.array(list_new_fire_count)) list_new_fire_count = [] writer.add_scalar('perform/new_fire_counts', avg_new_fire_count, i) writer.add_scalar('perform/pc_coverd_new_fire', avg_reward / avg_new_fire_count, i) action_0_count = list_action.count(0) action_1_count = list_action.count(1) action_2_count = list_action.count(2) action_3_count = list_action.count(3) writer.add_scalar('action_count/0', action_0_count / len(list_action), i) writer.add_scalar('action_count/1', action_1_count / len(list_action), i) writer.add_scalar('action_count/2', action_2_count / len(list_action), i) writer.add_scalar('action_count/3', action_3_count / len(list_action), i) list_action = [] writer.add_scalar('obs_state0/o00', O_np_val[0][0], i) writer.add_scalar('obs_state1/o01', O_np_val[0][1], i) writer.add_scalar('obs_state2/o02', O_np_val[0][2], i) writer.add_scalar('obs_state0/o10', O_np_val[1][0], i) writer.add_scalar('obs_state1/o11', O_np_val[1][1], i) writer.add_scalar('obs_state2/o12', O_np_val[1][2], i) writer.add_scalar('obs_state0/o20', O_np_val[2][0], i) writer.add_scalar('obs_state1/o21', O_np_val[2][1], i) writer.add_scalar('obs_state2/o22', O_np_val[2][2], i) print( 'losses at iteration: %d, losses: total %.3f, cross %.3f, shannon %.3f' % (i, avg_loss, avg_loss_cross, avg_loss_entropy)) print('memory size at iteration: %d, size: %d' % (i, len(memory.obs_memory))) if (i + 1) % N_SAVING_PERIOD == 0: f_name = setting['name'] dyn_autoencoder.save_the_model(i, f_name) dqn_agent.save_the_model(i, f_name) video_writer1.close()
def demo3_SysID(setting): n_sample = 1 action_param = 3 # Environment env = FireEnvironment(64, 64) # Vehicle to generate observation mask vehicle = Vehicle(n_time_windows=512, grid_size=(64, 64), planner_type='Random') # Trainer and Estimator dyn_autoencoder = DynamicAutoEncoder(SETTING, grid_size=(env.map_width, env.map_height), n_state=3, n_obs=3, encoding_dim=16, gru_hidden_dim=16) # Train Data Buffer memory = SingleTrajectoryBuffer(N_MEMORY_SIZE) # Train Iteration Logger writer = SummaryWriter() # Video Writier video_writer1 = ImageStreamWriter('RandomPathSysId.avi', FPS, image_size=(1200, 820)) # Add concat. text setting_text = '' for k, v in setting.items(): setting_text += k setting_text += ':' setting_text += str(v) setting_text += '\t' writer.add_text('setting', setting_text) ######################################## ### Interacting with the Environment ### ######################################## mask_obs, obs, state = env.reset() state_est_grid = dyn_autoencoder.u_k map_visit_mask, img_resized = vehicle.plan_a_trajectory( state_est_grid, n_sample, action_param) ### Loss Monitors ### list_loss = [] list_cross_entropy_loss = [] list_entropy_loss = [] ### Filling the Data Buffer ### for i in tqdm.tqdm(range(N_TRAIN_WAIT)): map_visit_mask, img_resized = vehicle.full_mask() mask_obs, obs, state, reward, info = env.step(map_visit_mask) memory.add(mask_obs, state, map_visit_mask) for i in tqdm.tqdm(range(N_TOTAL_TIME_STEPS)): # determine epsilon-greedy action from current sate h_k = dyn_autoencoder.h_k.squeeze().data.cpu().numpy() ### Collect Data from the Env. ### map_visit_mask, img_resized = vehicle.plan_a_trajectory( state_est_grid, n_sample, action_param) mask_obs, obs, state, reward, info = env.step(map_visit_mask) memory.add(mask_obs, state, map_visit_mask) ### Run the Estimator ### state_est_grid = dyn_autoencoder.step(mask_obs, map_visit_mask) h_kp1 = dyn_autoencoder.h_k.squeeze().data.cpu().numpy() ################################ ### Rendering and Save Video ### ################################ img_env = env.output_image() img_agent = dyn_autoencoder.output_image(state_est_grid) # State Est #blank = np.zeros((400, 200, 3)) img_top = img_env #np.concatenate((blank, img_env[:,:800], blank), axis=1) blank = np.zeros((20, 1200, 3)) img_top = np.concatenate((img_top, blank), axis=0) img_top = (img_top * 255).astype('uint8') img_state_est_grid_uint8 = (img_agent * 255).astype('uint8') backtorgb = cv2.cvtColor(img_state_est_grid_uint8, cv2.COLOR_GRAY2RGB) img_bayes_uint8 = np.concatenate((img_top, backtorgb), axis=0) #<-- to be saved render('Dynamic Auto Encoder', img_bayes_uint8, 1) # Save video # video_writer1.write_image_frame(img_bayes_uint8) ### Training ### loss_val, loss_val_cross, loss_val_ent, O_np_val = dyn_autoencoder.update( memory, N_TRAIN_BATCH, N_TRAIN_WINDOW) list_loss.append(loss_val) list_cross_entropy_loss.append(loss_val_cross) list_entropy_loss.append(loss_val_ent) if i % N_LOGGING_PERIOD == 0: avg_loss = np.mean(np.array(list_loss)) list_loss = [] writer.add_scalar('dynautoenc/loss', avg_loss, i) avg_loss_cross = np.mean(np.array(list_cross_entropy_loss)) list_cross_entropy_loss = [] writer.add_scalar('dynautoenc/crossentropy', avg_loss_cross, i) avg_loss_entropy = np.mean(np.array(list_entropy_loss)) list_entropy_loss = [] writer.add_scalar('dynautoenc/shannonentropy', avg_loss_entropy, i) writer.add_scalar('obs_state0/o00', O_np_val[0][0], i) writer.add_scalar('obs_state1/o01', O_np_val[0][1], i) writer.add_scalar('obs_state2/o02', O_np_val[0][2], i) writer.add_scalar('obs_state0/o10', O_np_val[1][0], i) writer.add_scalar('obs_state1/o11', O_np_val[1][1], i) writer.add_scalar('obs_state2/o12', O_np_val[1][2], i) writer.add_scalar('obs_state0/o20', O_np_val[2][0], i) writer.add_scalar('obs_state1/o21', O_np_val[2][1], i) writer.add_scalar('obs_state2/o22', O_np_val[2][2], i) print( 'losses at iteration: %d, losses: total %.3f, cross %.3f, shannon %.3f' % (i, avg_loss, avg_loss_cross, avg_loss_entropy)) print('memory size at iteration: %d, size: %d' % (i, len(memory.obs_memory))) if (i + 1) % N_SAVING_PERIOD == 0: f_name = setting['name'] dyn_autoencoder.save_the_model(i, f_name) video_writer1.close()
def demo5_ComparePolicies(setting, env): n_sample = 2048 # Vehicle to generate observation mask vehicle = Vehicle(n_time_windows=64, grid_size=(64,64), planner_type='Default') # Trainer and Estimator dyn_autoencoder = DynamicAutoEncoder(SETTING, grid_size = (env.map_width, env.map_height), n_state=3, n_obs=3, encoding_dim=4, gru_hidden_dim=4) ### DQN agent dqn_agent = DQN_Agent(state_size=4, action_size=4, replay_memory_size=1000, batch_size=64, gamma=0.99, learning_rate=0.01, target_tau=0.01, update_rate=1, seed=0) # Train Data Buffer memory = SingleTrajectoryBuffer(N_MEMORY_SIZE) # Video Writier ''' video_f_name = 'UsePlanner'+ '_' + setting['name'] + '_' + setting['policy_type'] + '.avi' video_writer1 = ImageStreamWriter(video_f_name, FPS, image_size=(1200,820)) ''' # Train Iteration Logger writer = SummaryWriter() # Add concat. text setting_text = '' for k,v in setting.items(): setting_text += k setting_text += ':' setting_text += str(v) setting_text += '\t' writer.add_text('setting', setting_text) ######################################## ### Interacting with the Environment ### ######################################## ### Loss Monitors ### list_rewards = [] list_new_fire_count = [] list_action = [] list_loss = [] ### Filling the Data Buffer ### for i in tqdm.tqdm(range(N_TRAIN_WAIT)): map_visit_mask, img_resized = vehicle.full_mask() mask_obs, obs, state, reward, info = env.step(map_visit_mask) memory.add(mask_obs.detach().long(), state.detach().long(), map_visit_mask.detach().long()) mask_obs, obs, state = env.reset() state_est_grid = dyn_autoencoder.u_k for i in tqdm.tqdm(range(N_TOTAL_TIME_STEPS)): # determine epsilon-greedy action from current sate h_k = dyn_autoencoder.h_k.squeeze().data.cpu().numpy() epsilon = 0.1 action = dqn_agent.act(h_k, epsilon) ### Collect Data from the Env. ### # Plan a trajectory policy_type = setting['policy_type'] if policy_type == 'Default': map_visit_mask, img_resized = vehicle.plan_a_trajectory(state_est_grid, n_sample, action) elif policy_type == 'Random': action = 777 map_visit_mask, img_resized = vehicle.generate_a_random_trajectory() elif policy_type == 'Act0': action = 0 map_visit_mask, img_resized = vehicle.plan_a_trajectory(state_est_grid, n_sample, action) elif policy_type == 'Act1': action = 1 map_visit_mask, img_resized = vehicle.plan_a_trajectory(state_est_grid, n_sample, action) elif policy_type == 'Act2': action = 2 map_visit_mask, img_resized = vehicle.plan_a_trajectory(state_est_grid, n_sample, action) else: action = 3 map_visit_mask, img_resized = vehicle.plan_a_trajectory(state_est_grid, n_sample, action) list_action.append(action) # Collect the masked observation mask_obs, obs, state, reward, info = env.step(map_visit_mask) memory.add(mask_obs.detach().long(), state.detach().long(), map_visit_mask.detach().long()) ### Run the Estimator ### state_est_grid = dyn_autoencoder.step(mask_obs, map_visit_mask) h_kp1 = dyn_autoencoder.h_k.squeeze().data.cpu().numpy() list_rewards.append(reward) list_new_fire_count.append(info['new_fire_count']) update = True #### Update the reinforcement learning agent and Dyn Auto Enc ### if policy_type != 'Random': dqn_agent.step(h_k, action, reward, h_kp1, False, update) loss_val, loss_val_cross, loss_val_ent, O_np_val = dyn_autoencoder.update(memory, N_TRAIN_BATCH, N_TRAIN_WINDOW, update) list_loss.append(loss_val) ################################ ### Rendering and Save Video ### ################################ img_env = env.output_image() img_agent = dyn_autoencoder.output_image(state_est_grid) # State Est #blank = np.zeros((400, 200, 3)) img_top = img_env #np.concatenate((blank, img_env[:,:800], blank), axis=1) blank = np.zeros((20, 1200, 3)) img_top = np.concatenate((img_top, blank), axis=0) img_top = (img_top*255).astype('uint8') img_state_est_grid_uint8 = (img_agent*255).astype('uint8') backtorgb = cv2.cvtColor(img_state_est_grid_uint8, cv2.COLOR_GRAY2RGB) img_bayes_uint8 = np.concatenate((img_top, backtorgb), axis=0) #<-- to be saved render('Dynamic Auto Encoder', img_bayes_uint8, 1) # Save video # #video_writer1.write_image_frame(img_bayes_uint8) if i%N_LOGGING_PERIOD == 0: avg_reward = np.mean(np.array(list_rewards)) list_rewards = [] writer.add_scalar('perform/rewards', avg_reward, i) avg_new_fire_count = max(np.mean(np.array(list_new_fire_count)), 1) # to avoid division by zero list_new_fire_count = [] writer.add_scalar('perform/new_fire_counts', avg_new_fire_count, i) writer.add_scalar('perform/pc_coverd_new_fire', avg_reward/avg_new_fire_count, i) if policy_type != 'Random': avg_loss = np.mean(np.array(list_loss)) list_loss = [] writer.add_scalar('dynautoenc/loss', avg_loss, i) action_0_count = list_action.count(0) action_1_count = list_action.count(1) action_2_count = list_action.count(2) action_3_count = list_action.count(3) writer.add_scalar('action_count/0', action_0_count/len(list_action), i) writer.add_scalar('action_count/1', action_1_count/len(list_action), i) writer.add_scalar('action_count/2', action_2_count/len(list_action), i) writer.add_scalar('action_count/3', action_3_count/len(list_action), i) list_action = [] writer.add_scalar('obs_state0/o00', O_np_val[0][0], i) writer.add_scalar('obs_state1/o01', O_np_val[0][1], i) writer.add_scalar('obs_state2/o02', O_np_val[0][2], i) writer.add_scalar('obs_state0/o10', O_np_val[1][0], i) writer.add_scalar('obs_state1/o11', O_np_val[1][1], i) writer.add_scalar('obs_state2/o12', O_np_val[1][2], i) writer.add_scalar('obs_state0/o20', O_np_val[2][0], i) writer.add_scalar('obs_state1/o21', O_np_val[2][1], i) writer.add_scalar('obs_state2/o22', O_np_val[2][2], i)
def train(fullcover, name, setting): n_sample = 20 # Environment env = FireEnvironment(64, 64) # Vehicle to generate observation mask vehicle = Vehicle(n_time_windows=1000, grid_size=(64,64), planner_type=setting['planner_type']) # Trainer and Estimator dyn_autoencoder = DynamicAutoEncoder(SETTING, grid_size = (env.map_width, env.map_height), n_state=3, n_obs=3, encoding_dim=16, gru_hidden_dim=16) # Train Data Buffer memory = SingleTrajectoryBuffer(N_MEMORY_SIZE) ### DQN agent dqn_agent = DQN_Agent(state_size=16, action_size=4, replay_memory_size=1000, batch_size=64, gamma=0.99, learning_rate=0.01, target_tau=0.01, update_rate=1, seed=0) # Train Iteration Logger from torch.utils.tensorboard import SummaryWriter writer = SummaryWriter() # Add concat. text setting_text = '' for k,v in setting.items(): setting_text += k setting_text += str(v) setting_text += '\t' writer.add_text('setting', setting_text) ######################################## ### Interacting with the Environment ### ######################################## mask_obs, obs, state = env.reset() map_visit_mask, img_resized = vehicle.full_mask() state_est_grid = dyn_autoencoder.u_k ### Loss Monitors ### list_loss = [] list_cross_entropy_loss = [] list_entropy_loss = [] list_rewards = [] list_count_fire_visit = [] list_count_all_fire = [] list_action = [] ### Filling the Data Buffer ### for i in tqdm.tqdm(range(N_TRAIN_WAIT)): if fullcover: map_visit_mask, img_resized = vehicle.plan_a_trajectory(state_est_grid, n_sample, action) else: map_visit_mask, img_resized = vehicle.full_mask() mask_obs, obs, state, reward = env.step(map_visit_mask) memory.add(mask_obs, state, map_visit_mask) for i in tqdm.tqdm(range(N_TOTAL_TIME_STEPS)): # determine epsilon-greedy action from current sate h_k = dyn_autoencoder.h_k.squeeze().data.cpu().numpy() epsilon = 0.1 action = dqn_agent.act(h_k, epsilon) list_action.append(action) ### Collect Data from the Env. ### if fullcover: map_visit_mask, img_resized = vehicle.full_mask() else: map_visit_mask, img_resized = vehicle.plan_a_trajectory(state_est_grid, n_sample, action) mask_obs, obs, state, reward = env.step(map_visit_mask) memory.add(mask_obs, state, map_visit_mask) ### Run the Estimator ### state_est_grid = dyn_autoencoder.step(mask_obs, map_visit_mask) h_kp1 = dyn_autoencoder.h_k.squeeze().data.cpu().numpy() #### Update the reinforcement learning agent ### dqn_agent.step(h_k, action, reward, h_kp1, done=False) list_rewards.append(reward) fire_count = (torch.sum(state[2])).item() fire_visit = (torch.sum(mask_obs.permute(2,0,1) * state[2].unsqueeze(0))).item() if fire_count < 1: print('no fire') else: list_count_fire_visit.append(fire_visit) list_count_all_fire.append(fire_count) ### Render the Env. and the Est. ### if i % N_RENDER_PERIOD == 0: img_env = env.output_image() img_state_est_grid = dyn_autoencoder.output_image(state_est_grid) render('env', img_env, 1) render('img_state_est_grid', img_state_est_grid, 1) ### Training ### loss_val, loss_val_cross, loss_val_ent, O_np_val = dyn_autoencoder.update(memory, N_TRAIN_BATCH, N_TRAIN_WINDOW) list_loss.append(loss_val) list_cross_entropy_loss.append(loss_val_cross) list_entropy_loss.append(loss_val_ent) if i%N_LOGGING_PERIOD == 0: avg_loss = np.mean(np.array(list_loss)) list_loss = [] writer.add_scalar('dynautoenc/loss', avg_loss, i) avg_loss_cross = np.mean(np.array(list_cross_entropy_loss)) list_cross_entropy_loss = [] writer.add_scalar('dynautoenc/crossentropy', avg_loss_cross, i) avg_loss_entropy = np.mean(np.array(list_entropy_loss)) list_entropy_loss = [] writer.add_scalar('dynautoenc/shannonentropy', avg_loss_entropy, i) avg_reward = np.mean(np.array(list_rewards)) list_rewards = [] writer.add_scalar('perform/rewards', avg_reward, i) avg_count_fire_visit = np.mean(np.array(list_count_fire_visit)) list_count_fire_visit = [] writer.add_scalar('perform/avg_count_fire_visit', avg_count_fire_visit, i) avg_count_all_fire = np.mean(np.array(list_count_all_fire)) list_count_all_fire = [] writer.add_scalar('perform/avg_count_all_fire', avg_count_all_fire, i) action_0_count = list_action.count(0) action_1_count = list_action.count(1) action_2_count = list_action.count(2) action_3_count = list_action.count(3) list_action = [] if setting['planner_type'] == 'Default': writer.add_scalar('action_count/0', action_0_count, i) writer.add_scalar('action_count/1', action_1_count, i) writer.add_scalar('action_count/2', action_2_count, i) writer.add_scalar('action_count/3', action_3_count, i) writer.add_scalar('obs_state0/o00', O_np_val[0][0], i) writer.add_scalar('obs_state1/o01', O_np_val[0][1], i) writer.add_scalar('obs_state2/o02', O_np_val[0][2], i) writer.add_scalar('obs_state0/o10', O_np_val[1][0], i) writer.add_scalar('obs_state1/o11', O_np_val[1][1], i) writer.add_scalar('obs_state2/o12', O_np_val[1][2], i) writer.add_scalar('obs_state0/o20', O_np_val[2][0], i) writer.add_scalar('obs_state1/o21', O_np_val[2][1], i) writer.add_scalar('obs_state2/o22', O_np_val[2][2], i) print('losses at iteration: %d, losses: total %.3f, cross %.3f, shannon %.3f' % (i, avg_loss, avg_loss_cross, avg_loss_entropy)) print('memory size at iteration: %d, size: %d' % (i, len(memory.obs_memory))) if (i+1)%N_SAVING_PERIOD==0: f_name = name dyn_autoencoder.save_the_model(i, f_name)