Ejemplo n.º 1
0
class MainSimulation(InitialConfig, Logger):
    def __init__(self):

        InitialConfig.__init__(self)
        Logger.__init__(self, self.logger_properties)

        self.simtime = SimTime(self.time_properties)
        self.spacecraft = Spacecraft(self.spacecraft_properties, self.components_properties, self.simtime)

        self.environment = Environment(self.environment_properties)
        self.disturbance = Disturbances(self.disturbance_properties, self.environment, self.spacecraft)

        # Auxiliary variables
        date = datetime.datetime.now()
        self.filename = date.strftime('%Y-%m-%d %H-%M-%S')

    def run_simulation(self):
        self.spacecraft.dynamics.orbit.set_propagator()
        # Loop
        self.simtime.reset_countTime()
        print('Simulation running...')
        while self.simtime.maincountTime <= self.simtime.endsimTime:
            # spacecraft update
            self.spacecraft.update()

            # current Environment and disturbances
            self.environment.update(self.simtime.current_decyaer, self.spacecraft.dynamics)
            self.disturbance.update()

            # Add the force and torque generated by the disturbance for the next dynamics propagation
            self.spacecraft.dynamics.attitude.add_ext_torque_b(self.disturbance.get_dist_torque())
            self.spacecraft.dynamics.add_ext_force_b(self.disturbance.get_dis_force())

            # Add the force and torque generated by the satellite for the next dynamics propagation
            self.spacecraft.dynamics.attitude.add_int_torque_b(self.spacecraft.generate_torque_b())

            if self.simtime.log_flag:
                self.spacecraft.update_data()
                self.simtime.progressionsimTime()
                self.simtime.log_flag = False

            # update time
            self.simtime.updateSimtime()

        # Data report to create dictionary
        self.spacecraft.create_report()
        #self.ephemeris.earth.create_report()

        # Save Dataframe pandas in csv file
        self.save_data()
        print('Finished')

    def save_data(self):
        master_data = self.spacecraft.master_data_satellite
        database = pd.DataFrame(master_data, columns=master_data.keys())
        database.to_csv("./Data/logs/"+self.filename+".csv", index=False, header=True)
        print("Data created")
Ejemplo n.º 2
0
    def __init__(self):
        InitialConfig.__init__(self)
        Logger.__init__(self, self.logger_properties)

        self.simtime = SimTime(self.time_properties)
        self.spacecraft = Spacecraft(self.spacecraft_properties, self.components_properties, self.simtime)

        self.environment = Environment(self.environment_properties)
        self.disturbance = Disturbances(self.disturbance_properties, self.environment, self.spacecraft)

        # Auxiliary variables
        date = datetime.datetime.now()
        self.filename = date.strftime('%Y-%m-%d %H-%M-%S')
    def __init__(self, show=True):

        Environment.__init__(self, CONFIG_FILE)
        self.max_velocity = None
        self.access = None
        self.damping = None
        self.damping_rate = None
        self.bad_radius = None
        self.wall_size_x = None
        self.wall_size_y = None
        self.actions = None
        self.x_actions_len = None
        self.num_actions = None
        self.drone_radius = None
        self.angle_goal = None
        self.angle_obstacle = None
        self.prev_ppx = None
        self.prev_ppy = None
        self.ppx = None
        self.ppy = None
        self.pvx = None
        self.pvy = None
        self.tx = None
        self.ty = None
        self.tx2 = None
        self.ty2 = None
        self.t = None
        self.update_rate = None
        self.goal_update_time = None
        self.bad_speed = None
        self.pixel_x = None
        self.meter_x = None
        self.pixel_y = None
        self.meter_y = None
        self.x_start = None
        self.x_end = None
        self.y_start = None
        self.y_end = None
        self.max_goal_distance = None
        self.load_cfg(CONFIG_FILE)
        self.max_distance = np.sqrt(self.wall_size_x**2 + self.wall_size_y**2)
        self.damping_per_step = self.update_rate / self.damping_rate
        self.reset()
        self.num_states = len(self.get_state())
        self.access_per_step = self.access * 1.0 / self.update_rate
        self.update_goal_step = self.update_rate * self.goal_update_time
        self.bad_distance_step = self.bad_speed * 1.0 / self.update_rate
        self.show = show
        if show:
            self.frontend = PuckFrontend(self)
Ejemplo n.º 4
0
    def learn(self, env: Environment, trnOpts: TrainOpts):
        device = "cpu"
        if self.opts.use_gpu and torch.cuda.is_available():
            device = "cuda:0"

        # Load checkpoint
        all_rewards, avg_rewards = self.load_checkpoint(trnOpts.checkpoint)
        self.multihead_net.to(device)
        n_iter = 0
        e = 0
        max_episodes = trnOpts.n_episodes
        max_steps = trnOpts.n_iterations
        while e < max_episodes:  # Looping episodes
            if max_steps > 0 and n_iter > max_steps:
                break
            curr_state = env.reset()
            curr_state = torch.from_numpy(curr_state).to(
                device).float().unsqueeze(0)
            episode_rewards = []
            step = 0
            episode = Episode()  # Each episode starts with fresh
            hidden_state = None
            while True:
                n_iter += 1
                step += 1
                # Collect experience
                # e < self.opts.n_episodes_exploring => This can be added too

                with torch.no_grad():

                    action, hidden_state = self.act(curr_state, hidden_state,
                                                    device)
                    next_state, reward, done, _ = env.step(action)
                    if self.opts.render:
                        env.render()
                    episode_rewards.append(reward)

                next_state = torch.from_numpy(next_state).to(
                    device).float().unsqueeze(0)
                episode.add_transition(
                    curr_state.squeeze(0).detach().cpu().data, action, reward,
                    next_state.squeeze(0).detach().cpu().data, done)
                if done:
                    if len(episode.states) > self.opts.sequence_length:
                        self.exp_buffer.add_episode(episode)

                    if not self.exp_buffer.is_accumulated(
                            self.opts.exp_batch_size):
                        print(
                            "Accumulating buffer iteration: {}".format(n_iter))

                    else:
                        episode_end_reward = np.array(episode_rewards).sum()
                        all_rewards.append(episode_end_reward)
                        e += 1  # Update episode
                        avg_reward = np.mean(all_rewards[-100:])
                        avg_rewards.append(avg_reward)
                        print(
                            "({}/{}) - End of episode with total reward: {} - Avg Reward: {} Total Iter: {}"
                            .format(e, max_episodes, episode_end_reward,
                                    avg_reward, step))
                    break

                curr_state = next_state

                # Learn if enough data is accumulated
                if self.exp_buffer.is_accumulated(self.opts.exp_batch_size):
                    # start = time.time()
                    self.update_params(n_iter, device)
                    # end = time.time()
                    # print("Elapsed :{}".format(end-start))

                if n_iter > 0 and self.opts.save_frequency > 0 and n_iter % self.opts.save_frequency == 0:
                    print("Saving at iteration {}".format(n_iter))
                    path = os.path.join(trnOpts.save_path,
                                        time.strftime("%Y%m%d-%H%M%S"))

                    self.save_model(path)
                    self.save_rewards(path, all_rewards, avg_rewards)

        return all_rewards, avg_rewards
Ejemplo n.º 5
0
    def __init__(self,
                 nn_config,
                 simulation_cfg,
                 show=True,
                 training_goal=False):
        assert isinstance(simulation_cfg, SimulationConfig)
        # assert isinstance(nn_config, NNConfigMovement)
        Environment.__init__(self, CONFIG_FILE)
        self.training_goal = training_goal
        self.max_velocity = None
        self.acceleration = None
        self.damping_per_sec = None
        self.brake_per_sec = None
        self.critical_radius = None
        self.wall_size_x = None
        self.wall_size_y = None
        self.actions = None
        self.x_actions_len = None
        self.num_actions = None
        self.drone_radius = None
        self.orientation = None
        self.ppx = None
        self.D = 0.0
        self.ppx_history = None
        self.prev_ppx = None
        self.ppy = None
        self.ppy_history = None
        self.prev_ppy = None
        self.pvx = None
        self.pvx_history = None
        self.pvy = None
        self.pvy_history = None
        self.tx = None
        self.ty = None
        self.tx_train = None
        self.ty_train = None
        self.t = None
        self.nn_config = nn_config
        self.update_rate = None
        self.predict_obstacle_angle = []
        self.predict_obstacle_orientation = {}
        self.particle_obs_predict = {}
        self.particle_goal_predict = {}
        self.predict_goal_angle = []
        self.predict_goal_angle_noise = []
        self.goal_update_time = None
        self.bad_speed = None
        self.total_meter_x = None
        self.total_meter_y = None
        self.area_pixel_x = None
        self.debug_pixel_x = None
        self.area_pixel_y = None
        self.debug_pixel_y = None
        self.x_start = None
        self.x_end = None
        self.y_start = None
        self.y_end = None
        self.span_x_start = None
        self.span_y_start = None
        self.span_x_end = None
        self.span_y_end = None
        self.num_drones = None
        self.max_sensor_distance = None
        self.push_distance = 0.1
        self.particles_goal = None
        self.particles_obs = None

        self.load_cfg(nn_config, simulation_cfg)
        # print self.max_sensor_distance
        # damping factor
        self.damping_per_tick = 1.0 - self.damping_per_sec / self.update_rate
        self.damping_per_tick = 0.0 if self.damping_per_tick < 0.0 else self.damping_per_tick
        self.brake_per_tick = 1.0 - self.brake_per_sec / self.update_rate
        self.brake_per_tick = 0.0 if self.brake_per_tick < 0.0 else self.brake_per_tick

        self.acceleration_per_tick = self.acceleration * 1.0 / self.update_rate
        self.update_goal_step = self.goal_update_steps
        self.show = show
        self.goal_count = 0
        self.crash_count = 0
        self.reset()
        self.num_states = len(self.get_state(0))
        if show:
            self.frontend = PuckRealMultiFinalFrontend(self)
Ejemplo n.º 6
0
    def learn(self, environment: Environment, n_episodes: int,
              n_iterations: int):
        avg_rewards = []
        for i in range(n_episodes):
            n_update_iter = 0  # Number of update iterations done. Needed to check if target networks need update
            curr_state = torch.tensor(environment.reset()).to(
                device=self.actor_network.device).float()
            episode_rewards = []
            while True:
                uniform_noise = False
                if n_update_iter < self.opts.uniform_noise_steps:
                    # Select a random action for early exploration
                    uniform_noise = True
                action = self.act(
                    curr_state, add_noise=True,
                    uniform_noise=uniform_noise).cpu().detach().numpy()
                next_state, reward, done, _ = environment.step(action)
                episode_rewards.append(reward)
                self.exp_buffer.add_experience(
                    curr_state,
                    torch.tensor(action).float(),
                    torch.tensor(reward).float(),
                    torch.tensor(next_state).float(), torch.tensor(done))
                curr_state = torch.tensor(next_state).float().to(
                    self.actor_network.device)
                curr_state.requires_grad = False
                self.opts.noise_epsilon = self.opts.noise_epsilon - self.opts.noise_depsilon
                if done:
                    self.reset()
                    total_episode_reward = np.array(episode_rewards).sum()
                    avg_rewards.append(total_episode_reward)
                    print(
                        "({}/{}) - End of episode with total reward: {} iteration: {}"
                        .format(i, n_episodes, total_episode_reward,
                                n_update_iter))
                    break
                if self.exp_buffer.is_accumulated():  # Do the updates
                    # Sample experiences
                    #self.critic_network.eval()
                    s_states, s_actions, s_rewards, s_next_states, s_done =\
                        self.exp_buffer.sample_tensor(self.opts.exp_batch_size, device=self.actor_network.device, dtype=torch.float32)

                    critic = self.critic_network.forward(
                        s_states, s_actions.detach())
                    target_actions = self.target_actor_network.forward(
                        s_next_states)
                    target_critics = self.target_critic_network.forward(
                        s_next_states, target_actions)
                    target = s_rewards.view(-1, 1) + self.opts.discount * (
                        1 - s_done.view(-1, 1)) * target_critics

                    # Run Gradient Descent on critic network
                    self.critic_optimizer.zero_grad()
                    #self.critic_network.train()  # Enable train mode
                    critic_loss = torch.nn.functional.mse_loss(critic, target)
                    critic_loss.backward()
                    self.critic_optimizer.step()

                    # Run Gradient Ascent on actor network
                    self.actor_optimizer.zero_grad()
                    self.actor_network.train()  # Enable train mode
                    actor_out = self.act(s_states)
                    actor_loss = -self.critic_network(s_states.detach(),
                                                      actor_out)
                    actor_loss = actor_loss.mean()
                    actor_loss.backward()
                    self.actor_optimizer.step()
                    #print(self.actor_network.fc3.weight.grad.mean())
                    self.update_target_networks(0.01)
                n_update_iter += 1  # One iteration is complete
        return avg_rewards
Ejemplo n.º 7
0
    def learn(self, env:Environment, trnOpts: TrainOpts):
        device = "cpu"
        if self.opts.use_gpu and torch.cuda.is_available():
            device = "cuda:0"
        
        # Load checkpoint
        all_rewards, avg_rewards = self.load_checkpoint(trnOpts.checkpoint)

        self.multihead_net.to(device)
   
        n_iter = 0
        e = 0
        max_episodes = trnOpts.n_episodes
        max_steps = trnOpts.n_iterations

        while e < max_episodes:  # Looping episodes
            if max_steps > 0 and n_iter > max_steps:
                break
            curr_state = env.reset()
            if type(curr_state) is not torch.Tensor:
                curr_state = torch.from_numpy(curr_state).to(device).float()
            curr_state = curr_state.unsqueeze(0)
            episode_rewards = []
            step = 0
            while True:
                n_iter += 1
                step += 1
                # Collect experience
                # e < self.opts.n_episodes_exploring => This can be added too
                clustering = self.opts.clustering and e < self.opts.n_episodes_exploring and len(self.exp_buffer.clusters) > 0  # Cluster count being higher than 0 means that clustering has been done
                
                with torch.no_grad():
                    if clustering:
                        action = self.act_cluster(curr_state, e)
                    else:
                        action = self.act(curr_state, device)
                    next_state, reward, done, _ = env.step(action)
          
                    if self.opts.render:
                        env.render()
                    episode_rewards.append(reward)

                    if clustering:
                        self.exp_buffer.clusters[self.exp_buffer.last_cluster_id].add_action(action, reward)
                      
                    # Check Clustering
                    if self.opts.clustering  and len(self.exp_buffer) > self.opts.cluster_samples \
                    and len(self.exp_buffer.clusters) == 0:  # It means that clustering already done
                        print("Clustering")
                        self.exp_buffer.cluster(self.opts.n_clusters, self.opts.use_elbow_plot)

                if type(next_state) is not torch.Tensor:
                    next_state = torch.from_numpy(next_state).to(device).float()
                next_state = next_state.unsqueeze(0)

                self.exp_buffer.add_experience(curr_state.detach().cpu().squeeze(0), action, reward, next_state.detach().cpu().squeeze(0), done)   
    
                if done:
                   
                    if not self.exp_buffer.is_accumulated(self.opts.exp_batch_size) or (self.opts.clustering and len(self.exp_buffer.states) < self.opts.cluster_samples):
                        print("Accumulating buffer iteration: {}".format(n_iter))
                    
                    else:
                        episode_end_reward = np.array(episode_rewards).sum()
                        all_rewards.append(episode_end_reward)
                        e += 1  # current filepdate episode
                        avg_reward = np.mean(all_rewards[-100:])
                        avg_rewards.append(avg_reward)
                        print("({}/{}) - End of episode with total reward: {} - Avg Reward: {} Total Iter: {}".format(e, max_episodes, episode_end_reward, avg_reward, step))
                    break
                
                curr_state = next_state

                # Learn if enough data is accumulated
                if self.exp_buffer.is_accumulated(self.opts.exp_batch_size):
                    #self.update_params(n_iter, device)
                    start = time.time()
                    self.update_params(n_iter, device)
                    end = time.time()
                    print("Elapsed :{}".format(end-start))
                    
                if n_iter > 0 and self.opts.save_frequency > 0 and n_iter % self.opts.save_frequency == 0:
                    print("Saving at iteration {}".format(n_iter))
                    path = os.path.join(trnOpts.save_path, time.strftime("%Y%m%d-%H%M%S"))

                    self.save_model(path)
                    self.save_rewards(path, all_rewards, avg_rewards)
        
        return all_rewards, avg_rewards
Ejemplo n.º 8
0
    def learn(self, env: Environment, max_episodes: int, max_steps: int):
        device = "cpu"
        if self.opts.use_gpu and torch.cuda.is_available():
            device = "cuda:0"
        self.network.to(device)
        self.target_network.to(device)
        self.reset()
        total_steps = 0
        optimizer = self.opts.optimizer(self.network.parameters(), self.opts.learning_rate)
        avg_rewards = []
        losses = []
        learning_complete = False
        episodes_passed = 0
        while not learning_complete:
            current_step = 0
            target_update_iter = 0
            episode_rewards = []
            curr_state = env.reset()
            action = 0
            if self.opts.use_exp_stack:
                curr_state = self.exp_stack.add_and_get(curr_state)
            #curr_state = torch.tensor(curr_state).to(device).float()
            if episodes_passed > max_episodes:
                    learning_complete = True
                    break
            while True:
                done = 0
                with torch.no_grad():  # Just collecting experience
                    for i in range(self.opts.exp_stack_size-1):
                        action = self.act(curr_state, device)
                    next_state, reward, done, _ = env.step(self.act_def[action])
                    self.exp_stack.add_state(next_state)
                    total_steps += 1 # Doesn't reset
                    next_state = self.exp_stack.get_stacked_states()
                    episode_rewards.append(reward)
                    self.exp_buffer.add_experience(curr_state, action, reward, next_state, done)
                    curr_state = next_state
                    if self.opts.render:
                        env.render()
        
                if done or current_step > max_steps:
                    self.reset()
                    total_episode_reward = np.array(episode_rewards).sum()
                    avg_rewards.append(total_episode_reward)
                    print("({}/{}) - End of episode with total reward: {} iteration: {} Memory Size: {}".format(episodes_passed, max_episodes, total_episode_reward, current_step, len(self.exp_buffer)))
                    break
                
                if self.exp_buffer.is_accumulated():
                    s_states, s_actions, s_rewards, s_next_states, s_done =\
                    self.exp_buffer.sample_numpy(self.opts.exp_batch_size)

                    # TODO: n-step Q-learning
                    optimizer.zero_grad()
                    with torch.no_grad():
                        s_next_states = torch.from_numpy(s_next_states).to(device).float()
                        s_done = torch.from_numpy(s_done).to(device).float()
                        s_rewards = torch.from_numpy(s_rewards).to(device).float()
                        next_state_vals = self.target_network(s_next_states)*(1-s_done.view(-1,1))  # Terminal states has V(s) = 0. That is why we use s_done
                        next_state_vals = next_state_vals*self.opts.discount  # Discount the reward
                        td_target = s_rewards + next_state_vals.max(1)[0].detach()  # In TD target, use target network (see Double Q learning)

                    #loss = -self.opts.loss(td_target, self.network(s_states))
                    s_states = torch.from_numpy(s_states).to(device).float()
                    s_actions = torch.from_numpy(s_actions).to(device).to(torch.int64)
                    curr_state_estimations = self.network(s_states).gather(1, s_actions.view(-1,1))
                    loss = torch.nn.functional.mse_loss(curr_state_estimations, td_target.unsqueeze(1))
                    loss.backward()
                    optimizer.step()

                    target_update_iter += 1
                    
                    losses.append(loss.item())
                    # Update target network
                    if target_update_iter > self.opts.target_update_freq:
                        target_update_iter = 0
                        polyak_update(self.target_network, self.network, 1)
                        print("Update target at step {}".format(total_steps))
                    
                if self.opts.verbose and total_steps%self.opts.verbose_frequency == 0 and len(losses) > 0:
                    print("Total Steps:{} - Loss:{} - Curr Epsilon:{}".format(total_steps, losses[-1], self.epsilon))
                current_step += 1  # Resets every episode
                
                
            
            if self.exp_buffer.is_accumulated():
                episodes_passed += 1  # Increment episode only if enough experience is collected
                
            self.epsilon = self.opts.min_epsilon + (self.opts.max_epsilon - self.opts.min_epsilon)*np.exp(-1.0*episodes_passed/self.opts.epsilon_decay) 

        return avg_rewards, losses