def load_model(self, attributes_path, discriminator_path=None, encoder_path=None, actor_path=None, critic_path=None, value_path=None): self.off_policy_algorithm.load_model(actor_path=actor_path, critic_path=critic_path, value_path=value_path) print_heading( "Loading models from paths: \n discriminator:{} \n encoder:{} \n attributes:{}" .format(discriminator_path, encoder_path, attributes_path)) if discriminator_path is not None: self.discriminator.load_state_dict(torch.load(discriminator_path)) if encoder_path is not None: self.encoder.load_state_dict(torch.load(encoder_path)) with open(attributes_path, "rb") as f: attributes = pickle.load(f) self.current_iteration = attributes["current_iteration"] self.beta = attributes["beta"] self.policy_update_count = attributes["policy_update_count"] self.max_reward = attributes["max_reward"] print('loading done')
def save_model(self, env_name, attributes_path=None, all_nets_path=None, discriminator_path=None, encoder_path=None, actor_path=None, critic_path=None, value_path=None, info="none"): self.off_policy_algorithm.save_model(env_name=env_name, all_nets_path=all_nets_path, actor_path=actor_path, critic_path=critic_path, value_path=value_path, info=info) if all_nets_path is not None: discriminator_path = all_nets_path encoder_path = all_nets_path if discriminator_path is None: discriminator_path = f'model/{env_name}/' os.makedirs(discriminator_path, exist_ok=True) if encoder_path is None: encoder_path = f'model/{env_name}/' os.makedirs(encoder_path, exist_ok=True) if attributes_path is None: attributes_path = f"attributes/{env_name}" os.makedirs(attributes_path, exist_ok=True) print_heading("Saving discriminator and encoder network parameters") torch.save(self.discriminator.state_dict(), discriminator_path + f"discriminator_{info}.pt") torch.save(self.encoder.state_dict(), encoder_path + f"encoder_{info}.pt") with open(attributes_path + "attributes.pkl", "wb") as f: pickle.dump( { "current_iteration": self.current_iteration, "beta": self.beta, "policy_update_count": self.policy_update_count, "max_reward": self.max_reward }, f) heading_decorator(bottom=True, print_req=True)
beta_init=args.beta_init, learning_rate_decay=22, learning_rate_decay_training_steps=22, optimizer=optimizer, discriminator_weight_decay=args.discriminator_weight_decay, gp_lambda=args.gp_lambda, encoder_weight_decay=args.encoder_weight_decay, information_constraint=args.information_constraint, grad_clip=args.grad_clip, loss_clip=args.loss_clip, clip_val_grad=args.clip_val_grad, clip_val_loss=args.clip_val_loss, batch_size=args.batch_size) # Test expert buffer. Size and wrapping print_heading("Trajectory Length") print(obsvail.trajectory_length) print_heading("Expert buffer length") print(len(obsvail.expert_buffer)) expert_trajectory = obsvail.expert_buffer.sample(batch_size=1)["trajectory"] print_heading("Sampled expert trajectory details") print("Trajectory Length ".ljust(50), len(expert_trajectory)) print("Absorbing indicator for 1st state".ljust(50), expert_trajectory[0]["is_absorbing"]) print("Absorbing indicator for last state".ljust(50), expert_trajectory[-1]["is_absorbing"]) print("Absorbing indicator for 2nd last state".ljust(50), expert_trajectory[-2]["is_absorbing"])
from robo_rl.common import TrajectoryBuffer from robo_rl.common import print_heading trajectory_buffer = TrajectoryBuffer(1000) observations = [[i + j for j in range(2)] for i in range(5)] trajectory1 = [observations[0], observations[1]] trajectory2 = [observations[4], observations[2]] trajectory_buffer.add(trajectory1) trajectory_buffer.add(trajectory2) print_heading("Sample trajectory") print(trajectory_buffer.sample(batch_size=2)) print_heading("Sample at particular timestep") print(trajectory_buffer.sample_timestep(batch_size=2, timestep=1))
activation_function = torchfunc.elu num_networks = 3 pfnn = LinearPFNN(layers_size=layers_size, final_layer_function=final_layer_function, activation_function=activation_function, num_networks=num_networks) input_tensor = torch.Tensor([0.291]) phase = 0.70 x = {"input": input_tensor, "phase": phase} input_tensor = x["input"] print_heading("Phase and corresponding indices") phase = x["phase"] # Enforce phase in [0,1) phase = phase - int(phase) print("Phase".ljust(25), phase) # Get indices for interval endpoints left_index = int(phase * num_networks) % num_networks right_index = (left_index + 1) % num_networks left_phase = left_index / num_networks right_phase = left_phase + (1 / num_networks) # phase = weight * left_phase + (1-weight) * right_phase weight = (right_phase - phase) * num_networks print("Num Networks".ljust(25), num_networks)