Exemple #1
0
 def __init__(self, config_):
     """
     Constructor
     :param config_:
     :return:
     """
     self.config = config_
     self.logger = logging.getLogger("cuda_logger")
     data_provider = DataProvider(self.config)
     filename = self.config['city_state_creator'].get(
         'filename', 'city_states.dill')
     self.city_states = data_provider.read_city_states(filename)
     self.reg_models = data_provider.read_regression_models()
Exemple #2
0
    def __init__(self, config_):
        """
        Constructor
        :param config_:
        :return:
        """
        self.config = config_
        self.logger = logging.getLogger("gym_logger")
        data_provider = DataProvider(self.config)

        # City state parameters
        self.city_states = data_provider.read_city_states()
        self.hex_attr_df = data_provider.read_hex_bin_attributes()
        self.hex_bins = self.hex_attr_df['hex_id']

        self.T = len(self.city_states)  # Number of time steps
        self.S = len(self.hex_bins)  # Number of hex bins

        # Environment parameters
        self.num_drivers = self.config['env_parameters']['num_drivers']
        self.distribution = self.config['env_parameters'][
            'driver_distribution']
        self.next_free_timestep = np.zeros(
            self.num_drivers)  # Next free timestep for each driver
        self.total_driver_earnings = np.zeros(
            self.num_drivers)  # Total earnings for each driver

        # Environment action and observation space
        actions = [7 for i in range(self.S)]
        drivers = [self.num_drivers for i in range(self.S)]
        self.action_space = spaces.MultiDiscrete(actions)
        # self.observation_space = spaces.Tuple((
        #     # spaces.Discrete(self.T),  # Time step
        #     spaces.MultiDiscrete(drivers)  # Driver distribution
        # ))
        self.observation_space = spaces.MultiDiscrete(drivers)

        self.reset()
    def run(self):
        """
        Creates and runs training episode
        :param:
        :return:
        """
        data_provider = DataProvider(self.config)
        hex_attr_df = data_provider.read_hex_bin_attributes()
        hex_distance_df = data_provider.read_hex_bin_distances()
        city_states = data_provider.read_city_states(self.city_states_filename)
        neighborhood = data_provider.read_neighborhood_data()
        popular_bins = data_provider.read_popular_hex_bins()
        num_episodes = self.config['RL_parameters']['num_episodes']
        ind_episodes = self.config['RL_parameters']['ind_episodes']
        exp_decay_multiplier = self.config['RL_parameters']['exp_decay_multiplier']

        q_ind = None
        r_table = None
        xi_matrix = None

        best_episode = None
        best_model = {}

        progress_bar = tqdm(xrange(num_episodes))
        for episode_id in progress_bar:
            progress_bar.set_description("Episode: {}".format(episode_id))
            current_best = -1000000

            # Create episode
            ind_exploration_factor = np.e ** (-1 * episode_id * exp_decay_multiplier / ind_episodes)

            episode = Episode(self.config,
                              episode_id,
                              ind_exploration_factor,
                              hex_attr_df,
                              hex_distance_df,
                              city_states,
                              neighborhood,
                              popular_bins,
                              q_ind,
                              r_table,
                              xi_matrix)

            # Run episode
            tables = episode.run()
            q_ind = tables['q_ind']
            r_table = tables['r_table']
            xi_matrix = tables['xi_matrix']
            episode_tracker = tables['episode_tracker']

            # Uncomment for logging if running a job, comment during experiments
            # otherwise it leads to insanely huge logging output which is useless

            # self.logger.info("""
            #                  Expt: {} Episode: {} Earnings: {}
            #                  Pax rides: {} Relocation rides: {} Unmet demand: {}
            #                  """.format(self.expt_name, episode_id,
            #                             episode_tracker.gross_earnings,
            #                             episode_tracker.successful_waits,
            #                             episode_tracker.relocation_rides,
            #                             episode_tracker.unmet_demand))
            # self.logger.info("----------------------------------")

            self.training_tracker.update_RL_tracker(
                episode_id, episode_tracker.gross_earnings,
                episode_tracker.successful_waits, episode_tracker.unsuccessful_waits,
                episode_tracker.unmet_demand, episode_tracker.relocation_rides,
                episode_tracker.DET, episode_tracker.DPRT, episode_tracker.DWT,
                episode_tracker.DRT, episode_tracker.DCT)

            # Keep track of the best episode
            if self.objective == 'revenue':
                if episode_tracker.gross_earnings >= current_best:
                    best_episode = episode_tracker
                    current_best = best_episode.gross_earnings
            else:  # self.objective == 'pickups':
                if episode_tracker.successful_waits >= current_best:
                    best_episode = episode_tracker
                    current_best = episode_tracker.successful_waits

            # Keep track of the best model
            best_model['ind_exploration_factor'] = ind_exploration_factor
            best_model['config'] = self.config
            best_model['q_ind'] = q_ind
            best_model['r_table'] = r_table
            best_model['xi_matrix'] = xi_matrix
            best_model['training_tracker'] = self.training_tracker

        # After finishing training
        self.logger.info("Expt: {} Earnings: {} Met Demand: {} Unmet Demand: {}".format(self.expt_name,
                                                                         best_episode.gross_earnings,
                                                                         best_episode.successful_waits,
                                                                         best_episode.unmet_demand))
        return best_episode, best_model, self.training_tracker
Exemple #4
0
    def run(self):
        """
        Creates and runs training episode
        :param:
        :return:
        """
        data_provider = DataProvider(self.config)
        hex_attr_df = data_provider.read_hex_bin_attributes()
        hex_distance_df = data_provider.read_hex_bin_distances()
        city_states = data_provider.read_city_states(
            self.test_parameters['city_states_filename'])
        model = data_provider.read_model(
            self.test_parameters['model_filename'])
        neighborhood = data_provider.read_neighborhood_data()
        popular_bins = data_provider.read_popular_hex_bins()

        q_ind = model['q_ind']
        r_table = model['r_table']
        xi_matrix = model['xi_matrix']

        episode_id = 0

        # Create episode
        ind_exploration_factor = 0.0

        episode = Episode(self.config, episode_id, ind_exploration_factor,
                          hex_attr_df, hex_distance_df, city_states,
                          neighborhood, popular_bins, q_ind, r_table,
                          xi_matrix, True)

        # Run episode
        tables = episode.run()
        q_ind = tables['q_ind']
        r_table = tables['r_table']
        xi_matrix = tables['xi_matrix']
        episode_tracker = tables['episode_tracker']

        self.testing_tracker.update_RL_tracker(
            0, episode_tracker.gross_earnings,
            episode_tracker.successful_waits,
            episode_tracker.unsuccessful_waits, episode_tracker.unmet_demand,
            episode_tracker.relocation_rides, episode_tracker.DET,
            episode_tracker.DPRT, episode_tracker.DWT, episode_tracker.DRT,
            episode_tracker.DCT)

        self.logger.info("""
                         Expt: {} Earnings: {}
                         Model: {}
                         Test day: {}
                         Num drivers: {}
                         Pax rides: {} Relocation rides: {} Unmet demand: {}
                         """.format(
            self.expt_name, episode_tracker.gross_earnings,
            self.test_parameters['model_filename'],
            self.test_parameters['city_states_filename'],
            self.config['RL_parameters']['num_drivers'],
            episode_tracker.successful_waits, episode_tracker.relocation_rides,
            episode_tracker.unmet_demand))
        self.logger.info("----------------------------------")

        return self.testing_tracker
class BaselineDriver(object):
    def __init__(self, config):
        self.config = config
        self.logger = logging.getLogger("baseline_logger")
        self.data_provider = DataProvider(self.config)
        self.data_exporter = DataExporter(self.config)

    @staticmethod
    def run_baseline(baseline_config):
        baseline_name = baseline_config['name']
        baseline_count = baseline_config['count']
        config = baseline_config['config']
        city_states = baseline_config['city_states']
        episode_rewards = []
        if baseline_name == "cDQN":
            baseline = cDQN(config)
            rewards = baseline.run(city_states)
            for _ in range(len(rewards)):
                episode_rewards.append({
                    'agent': 'cDQN',
                    'episode': _,
                    'run': baseline_count,
                    'earnings': rewards[_]
                })

        if baseline_name == "cA2C":
            baseline = cA2C(config)
            rewards = baseline.run(city_states)
            for _ in range(len(rewards)):
                episode_rewards.append({
                    'agent': 'cA2C',
                    'episode': _,
                    'run': baseline_count,
                    'earnings': rewards[_]
                })
        if baseline_name == "A2C":
            baseline = A2C(config)
            rewards = baseline.run(city_states)
            for _ in range(len(rewards)):
                episode_rewards.append({
                    'agent': 'A2C',
                    'episode': _,
                    'run': baseline_count,
                    'earnings': rewards[_]
                })
        return episode_rewards

    def run(self):
        self.logger.info("Starting baselines")
        city_states = self.data_provider.read_city_states()
        baseline_list = self.config['baselines']['baseline_list']

        # Create a pool of processes
        num_processes = mp.cpu_count()
        self.logger.info("Processes: {}".format(num_processes))
        pool = ProcessPool(nodes=num_processes)

        configs = []
        for count in range(10):
            for name in baseline_list:
                configs.append({
                    'name': name,
                    'count': count,
                    'config': self.config,
                    'city_states': city_states
                })

        results = pool.amap(self.run_baseline, configs).get()
        pool.close()
        pool.join()
        pool.clear()

        episode_rewards = []
        for result in results:
            episode_rewards += result

        self.data_exporter.export_baseline_data(episode_rewards)
        self.logger.info("Finished baselines")