# Define optimizer parameters optimizer_parameters = policy_net.parameters() else: # Define optimizer parameters optimizer_parameters = policy_net.parameters() + encoder.parameters() + forward_model.parameters() + inverse_model.parameters() # Only use defined parameters if there is no previous output being loaded if RUN_TO_LOAD != None: # Initialize and load optimizer optimizer = optim.Adam(params=optimizer_parameters, lr=LEARNING_RATE) optimizer.load_state_dict(OPTIMIZER_STATE_DICT) # Load replay memory memory = ReplayMemory(REPLAY_MEMORY_SIZE) for chunk in REPLAY_MEMORY_CHUNKS: memory.append_storage_chunk(chunk) else: # Initialize optimizer optimizer = optim.Adam(params=optimizer_parameters, lr=LEARNING_RATE) # Initialize replay memory memory = ReplayMemory(REPLAY_MEMORY_SIZE) # Initialize total variables total_frames = 0 total_episodes = FINISHED_EPISODES total_original_rewards = TOTAL_ORIGINAL_REWARDS total_shaped_rewards = TOTAL_SHAPED_REWARDS total_losses = TOTAL_LOSSES total_start_time = time.time() # Initialize episode variables