def reset(self):
        print("Calling the reset method! ")
        self.model = Model(
            ZONE_IDS,
            DEMAND_SOURCE,
            WARMUP_TIME_HOUR,
            ANALYSIS_TIME_HOUR,
            FLEET_SIZE=self.config["fleet_size"],
            PRO_SHARE=self.config["pro_s"],
            SURGE_MULTIPLIER=self.config["surge"],
            bonus=self.config["bonus"],
            percent_false_demand=self.config["percent_false_demand"],
            percentage_know_fare=self.config["perc_k"],
            AV_share=self.config["av_share"],
        )

        self.total_reward = 0.0
        self.T = WARMUP_TIME_SECONDS
        self.old_income = 0

        state_n = []
        for _, veh in enumerate([v for v in self.model.av_vehs]):
            state_n.append(self.model.get_state(veh, self.T))

        return state_n
Esempio n. 2
0
    def reset(self):
        """
        Restarts the gym environment by resetting all parameters to default.
        @return: the modified state.
        """
        print("Calling the reset method! ")
        self.model = Model(
            ZONE_IDS,
            DEMAND_SOURCE,
            WARMUP_TIME_HOUR,
            ANALYSIS_TIME_HOUR,
            FLEET_SIZE=self.config["fleet_size"],
            PRO_SHARE=self.config["pro_s"],
            SURGE_MULTIPLIER=self.config["surge"],
            bonus=self.config["bonus"],
            percent_false_demand=self.config["percent_false_demand"],
            percentage_know_fare=self.config["perc_k"],
        )

        veh = self.model.vehilcs[-1]
        veh.is_AV = True
        self.total_reward = 0.0
        self.T = WARMUP_TIME_SECONDS
        self.old_income = 0

        self.update_state()
        # self.amods.append( copy.deepcopy(self.amod) )
        return self.state
    def __init__(self, config):
        super(gym.Env, self).__init__()
        print("INSIDE INIT FUNCTION")
        print(config["av_share"])
        self.config = config
        self.model = Model(
            ZONE_IDS,
            DEMAND_SOURCE,
            WARMUP_TIME_HOUR,
            ANALYSIS_TIME_HOUR,
            FLEET_SIZE=config["fleet_size"],
            PRO_SHARE=config["pro_s"],
            SURGE_MULTIPLIER=config["surge"],
            bonus=config["bonus"],
            percent_false_demand=config["percent_false_demand"],
            percentage_know_fare=config["perc_k"],
            AV_share=config["av_share"],
        )

        self.dT = INT_REBL
        self.action_space = spaces.Discrete(len(ZONE_IDS))
        # why not define an observation space?
        self.state = np.zeros((len(ZONE_IDS), 3))
        self.observation_space = np.zeros((len(ZONE_IDS), 3))

        # self.center = np.zeros((Mlng, Mlat, 2))
        self.input_dim = 3 * len(ZONE_IDS)
        self.step_count = 0
        self.epi_count = 0
        self.total_reward = 0.0
        self.T = WARMUP_TIME_SECONDS
        self.old_income = 0
Esempio n. 4
0
 def reset(self):
     data_instance = Data.init_from_config_dic(config_dict)
     self.model = Model(data_instance)
     self.T = WARMUP_TIME_SECONDS
     # run the warm up period
     for t in range(self.T, self.T + 3600, INT_ASSIGN):
         self.model.dispatch_at_time(t)
     self.T = ANALYSIS_TIME_SECONDS
     print("##########################")
     print("##########################")
     print("End of the warm up time ")
     print("##########################")
     print("##########################")
Esempio n. 5
0
    def __init__(self, config, penalty=-10):
        """

        @param config:
        @param penalty:
        """
        print("INSIDE INIT FUNCTION")
        self.config = config
        self.model = Model(
            ZONE_IDS,
            DEMAND_SOURCE,
            WARMUP_TIME_HOUR,
            ANALYSIS_TIME_HOUR,
            FLEET_SIZE=config["fleet_size"],
            PRO_SHARE=config["pro_s"],
            SURGE_MULTIPLIER=config["surge"],
            bonus=config["bonus"],
            percent_false_demand=config["percent_false_demand"],
            percentage_know_fare=config["perc_k"],
        )

        veh = self.model.vehilcs[-1]
        veh.is_AV = True
        # else:
        #     print
        #     self.model = model
        #     self._model_ = copy.deepcopy(model)

        self.dT = INT_REBL
        self.penalty = penalty
        self.action_space = spaces.Discrete(len(ZONE_IDS))
        # why not define an observation space?
        self.state = np.zeros((len(ZONE_IDS), 3))
        # self.center = np.zeros((Mlng, Mlat, 2))
        self.input_dim = 3 * len(ZONE_IDS)
        self.step_count = 0
        self.epi_count = 0
        self.total_reward = 0.0
        self.T = WARMUP_TIME_SECONDS
        self.old_income = 0
def main():
    parser = argparse.ArgumentParser(
        description="Simulation of drivers' behavior")
    # from lib.Constants import PERCE_KNOW
    parser.add_argument(
        '-f',
        '--fleet',
        help=
        'Fleet sizes to simulate, formatted as comma-separated list (i.e. "-f 250,275,300")'
    )
    parser.add_argument(
        '-m',
        '--multiplier',
        help=
        'Surge multiplier, formatted as comma-separated list (i.e. "-m 1,1.5,2")'
    )
    parser.add_argument('-b', '--bonus', type=int, help='Bonus')
    parser.add_argument('-d', '--demand', help='Percent false demand ')
    parser.add_argument('-AV',
                        '--AV_fleet_size',
                        help="Number of Naive drivers ")
    parser.add_argument('-NAIVE',
                        '--NAIVE_fleet_size',
                        help="Number of Naive drivers ")
    parser.add_argument('-PRO',
                        '--PRO_fleet_size',
                        help="Number of Professional drivers ")
    parser.add_argument(
        '-BH',
        '--behavioral_opt',
        help="Perform behavioral optimization, pass 'yes' or 'no' ")
    parser.add_argument('-SURGE',
                        '--surge_pricing',
                        help="should do surge pricing, pass 'yes' or 'no' ")

    parser.add_argument(
        '-k',
        '--know',
        help=
        'Percent knowing fare, formatted as comma-separated list (i.e. "-m 1,1.5,2") '
    )
    parser.add_argument(
        '-p',
        '--pro',
        help=
        'Percent pro drivers, formatted as comma-separated list (i.e. "-m 1,1.5,2") '
    )
    parser.add_argument('-r',
                        '--replications',
                        help='number of times to run the simulation')
    parser.add_argument('-bb', '--beta', help='BETA')
    parser.add_argument('-b_policy', '--bonus_policy', help='bonus per zone ')
    parser.add_argument('-budget', '--budget', help='budget ')
    args = parser.parse_args()
    # TODO: argpars should get the bonus policy as input
    data_instance = Data()

    if args.fleet:
        fleet_sizes = [int(args.fleet)]
    else:
        fleet_sizes = data_instance.FLEET_SIZE

    if args.behavioral_opt:
        if args.behavioral_opt.lower() in ('yes', 'true'):
            do_behavioral_opt = True
        else:
            do_behavioral_opt = False
    else:
        do_behavioral_opt = data_instance.do_behavioral_opt

    if args.surge_pricing:
        if args.surge_pricing.lower() in ('yes', 'true'):
            do_surge_pricing = True
        else:
            do_surge_pricing = False
    else:
        do_surge_pricing = data_instance.do_surge_pricing

    if args.PRO_fleet_size:
        set_of_NUM_OF_PRO_DRIVERS = [int(args.PRO_fleet_size)]
    else:
        set_of_NUM_OF_PRO_DRIVERS = [data_instance.PRO_FLEET_SIZE]

    if args.NAIVE_fleet_size:
        set_of_NUM_OF_NAIVE_DRIVERS = [int(args.NAIVE_fleet_size)]
    else:
        set_of_NUM_OF_NAIVE_DRIVERS = [data_instance.NAIVE_FLEET_SIZE]

    if args.AV_fleet_size:
        set_of_NUM_OF_AV_DRIVERS = [int(args.AV_fleet_size)]
    else:
        set_of_NUM_OF_AV_DRIVERS = [data_instance.AV_FLEET_SIZE]

    if args.multiplier:
        # surge = args.multiplier
        surges = [float(x) for x in args.multiplier.split(',')]
    else:
        surges = [data_instance.SURGE_MULTIPLIER]

    if args.know:
        perc_know = [float(args.know)]
    else:
        perc_know = [data_instance.PERCE_KNOW]

    if args.bonus:
        bonus = args.bonus
    else:
        bonus = data_instance.BONUS
    if args.beta:
        beta = float(args.beta)
    else:
        beta = configs_dict["BETA"]

    if args.pro:
        pro_share = [float(x) for x in args.pro.split(',')]
    else:
        pro_share = [data_instance.PRO_SHARE]

    if args.demand:
        percent_false_demand = float(args.demand)
    else:
        percent_false_demand = data_instance.PERCENT_FALSE_DEMAND
    if args.replications:
        n_rep = int(args.replications)
    else:
        n_rep = 1
    if args.bonus_policy:
        bonus_policy = args.bonus_policy
    else:
        bonus_policy = data_instance.BONUS_POLICY
    if args.budget:
        budget = args.budget
    else:
        budget = data_instance.BUDGET
    # output_path = "./Outputs/avg_fare_info/" + str(beta) + "/"

    from lib.rebalancing_optimizer import RebalancingOpt

    for num_pros in set_of_NUM_OF_PRO_DRIVERS:
        for num_naives in set_of_NUM_OF_NAIVE_DRIVERS:
            for num_avs in set_of_NUM_OF_AV_DRIVERS:
                for surge in surges:
                    for repl in range(n_rep):
                        TOTAL_FLEET_SIZE = 2500
                        num_naives = TOTAL_FLEET_SIZE - num_pros
                        data_instance.AV_FLEET_SIZE = num_avs
                        data_instance.NAIVE_FLEET_SIZE = num_naives
                        data_instance.PRO_FLEET_SIZE = num_pros
                        data_instance.do_behavioral_opt = do_behavioral_opt
                        data_instance.do_surge_pricing = do_surge_pricing

                        if do_behavioral_opt:
                            st = '/with_behavioral_opt/'
                        else:
                            st = '/no_behavioral_opt/'
                        output_path = "./Outputs/" + datetime.datetime.now().strftime("%Y-%m-%d-%H-%M") + \
                                      st + str('Pro_') + str(num_pros) \
                                      + str('NAIVE_') + str(num_naives) \
                                      + str('AV_') + str(num_avs) \
                                      + str('budget_') + str(budget) + "_" \
                                      + str("bonus_policy") + "_" + str(bonus_policy) + "_" \
                                      + str('do_surge') + "_" + str(data_instance.do_surge_pricing) + "_" \
                                      + str('do_opt') + "_" + str(data_instance.do_behavioral_opt) + "_" \
                                      + str(datetime.datetime.now()).split('.')[0] + "/"

                        if not os.path.exists(output_path):
                            os.makedirs(output_path)

                        print("iteration number ", repl)
                        print('Surge is {}'.format(surge))

                        data_instance.SURGE_MULTIPLIER = surge
                        data_instance.BONUS = bonus
                        data_instance.output_path = output_path

                        # data_instance.do_behavioral_opt = False
                        m = Model(data_instance, configs_dict, beta,
                                  output_path)
                        # start time
                        stime = time.time()
                        # dispatch the system for T_TOTAL seconds, at the interval of INT_ASSIGN
                        # TODO: every run should in include the policy from the start
                        # TODO: process Feb's month as well.
                        months = [1, 2]
                        days = [30, 15]
                        stop_month = months[-1]
                        for ix, month in enumerate(months):
                            for d_idx in range(1, days[ix]):
                                stop_day = days[ix]

                                if month == 1 and d_idx >= 15:
                                    # NOTE: THIS WILL NOT HAVE THE DESIRED EFFECT, BC OPERATOR has attribute set in the beginning
                                    data_instance.do_behavioral_opt = True
                                    m.operator.do_behavioral_opt = True

                                    # data_instance.do_surge_pricing = True
                                    # m.operator.do_surge_pricing = True

                                for T in range(
                                        data_instance.WARMUP_TIME_SECONDS,
                                        data_instance.T_TOTAL_SECONDS,
                                        data_instance.INT_ASSIGN):
                                    m.dispatch_at_time(T, day_idx=d_idx)
                                m.get_service_rate_per_zone(d_idx, month)
                                m.get_drivers_earnings_for_one_day(
                                    d_idx, month)
                                m.get_operators_earnings_for_one_day(
                                    d_idx, month)
                                print(
                                    f"Starting a new day, finished day number {d_idx + 1} of month {month}"
                                )
                                print(f"it took {(time.time() - stime) / 60}")
                                m.reset_after_one_day_of_operation(
                                    stop_month, stop_day)

                        if num_pros > 0:
                            all_dfs = pd.concat([
                                v.report_learning_rates() for v in m.vehicles
                                if v.driver_type == DriverType.PROFESSIONAL
                            ],
                                                ignore_index=True)
                            all_dfs.to_csv(output_path +
                                           "fmean for all drivers.csv")

                            all_fare_reliability_dfs = pd.concat(
                                [
                                    v.report_fare_reliability_evolution()
                                    for v in m.vehicles
                                    if v.driver_type == DriverType.PROFESSIONAL
                                ],
                                ignore_index=True)
                            all_fare_reliability_dfs.to_csv(
                                output_path +
                                "fare reliability for all drivers.csv")

                            all_m_reliability_dfs = pd.concat(
                                [
                                    v.report_matching_reliability_evolution()
                                    for v in m.vehicles
                                    if v.driver_type == DriverType.PROFESSIONAL
                                ],
                                ignore_index=True)
                            all_m_reliability_dfs.to_csv(
                                output_path +
                                "matching reliability for all drivers.csv")

                            all_fare_reliability_dfs = pd.concat(
                                [
                                    v.report_surge_bonus_behavior()
                                    for v in m.vehicles
                                    if v.driver_type == DriverType.PROFESSIONAL
                                ],
                                ignore_index=True)
                            all_fare_reliability_dfs.to_csv(
                                output_path +
                                "surge behavior for all drivers.csv")

                        all_earning_dfs = pd.concat(
                            [v.report_final_earnings() for v in m.vehicles],
                            ignore_index=True)
                        all_earning_dfs.to_csv(output_path +
                                               "earnings for all drivers.csv")
                        operators_revenue = m.operator.report_final_revenue()
                        operators_revenue.to_csv(output_path +
                                                 "operators_revenue.csv")

                        print('Total drivers: ', len(m.vehicles))
                        print(
                            '# of Pro drivers: ',
                            len([
                                v for v in m.vehicles
                                if v.driver_type == DriverType.PROFESSIONAL
                            ]))
                        print(
                            '# of naive drivers: ',
                            len([
                                v for v in m.vehicles
                                if v.driver_type == DriverType.NAIVE
                            ]))
                        print(
                            '# of inexperienced drivers: ',
                            len([
                                v for v in m.vehicles
                                if v.driver_type == DriverType.INEXPERIENCED
                            ]))
                        # end time
                        etime = time.time()
                        # run time of this simulation
                        runtime = etime - stime
                        print("The run time was {runtime} minutes ".format(
                            runtime=runtime / 60))

                        report = m.report_final_performance()

                        # So that it doesn't save a file with 1.5.py, rather 15.py
                        ss = str(surge).split('.')
                        ss = ''.join(ss)

                        fleet_size = num_avs + num_pros + num_naives
                        report.to_csv(output_path + "report for fleet size " +
                                      str(fleet_size) + " surge " + str(ss) +
                                      "pro_ " + str(num_pros) + "naive_ " +
                                      str(num_naives) + "AV_" + str(num_avs) +
                                      " repl" + str(repl) + ".csv")
Esempio n. 7
0
class BonusEnv(MultiAgentEnv):
    def __init__(self, env_config):
        # data_instance = Data.init_from_config_dic(config_dict)
        # self.model = Model(data_instance)
        # self.T = WARMUP_TIME_SECONDS
        self.min_action = 0
        self.max_action = config_dict["MAX_BONUS"]
        self.SHARED_REWARD = env_config.get("SHARED_REWARD", False)

        # define action/state space per zone
        # https://github.com/openai/gym/wiki/Table-of-environments
        # https://github.com/openai/gym/blob/master/gym/envs/classic_control/continuous_mountain_car.py
        # https://github.com/openai/multiagent-particle-envs/blob/master/bin/interactive.py
        self.action_space = spaces.Box(low=self.min_action,
                                       high=self.max_action,
                                       shape=(1, ),
                                       dtype=np.float32)

        self.reset()

    def step(self, action_n):
        """
        for z in zones: set the bonus value
        action_n is {z_id:bonus}
        instead of the operator, the env sets the bonus values
        """
        for zone in self.model.zones:
            requested_bonus = action_n[zone.id]
            # make sure it is within bounds
            requested_bonus = min(max(requested_bonus, self.min_action),
                                  self.max_action)
            if requested_bonus <= self.model.budget:
                self.model.budget -= requested_bonus
                zone.set_bonus(requested_bonus)

        # simulate for a while (e.g., 10 mins)
        for t in range(self.T, self.T + POLICY_UPDATE_INTERVAL, INT_ASSIGN):
            self.model.dispatch_at_time(t)

        # observe the next state
        states = self._get_states()
        # observe the (global) reward
        reward = self._get_reward_unserved()
        # other info
        info = self._get_info()
        # update the clock
        self.T += POLICY_UPDATE_INTERVAL
        # done flag
        done = self._is_done()

        return states, reward, done, info

    def reset(self):
        data_instance = Data.init_from_config_dic(config_dict)
        self.model = Model(data_instance)
        self.T = WARMUP_TIME_SECONDS
        # run the warm up period
        for t in range(self.T, self.T + 3600, INT_ASSIGN):
            self.model.dispatch_at_time(t)
        self.T = ANALYSIS_TIME_SECONDS
        print("##########################")
        print("##########################")
        print("End of the warm up time ")
        print("##########################")
        print("##########################")

    def _get_states(self):
        """
        Place holder. should return info per zone

        :return:
        """
        demand, supply = self.model.get_both_supply_and_demand_per_zone(self.T)
        return demand

    def _get_stats(self):
        self.stats = [z.generate_performance_stats() for z in self.model.zones]

    def _get_reward(self):
        if self.SHARED_REWARD:
            rew = np.sum([
                z.reward_dict[np.ceil(self.T / POLICY_UPDATE_INTERVAL)]
                for z in self.model.zones
            ])
            return {z.id: rew for z in self.model.zones}
        else:
            return {z.id: 1 for z in self.model.zones}

    def _get_reward_unserved(self):
        return np.sum([
            z.generate_performance_stats(self.T)[3] for z in self.model.zones
        ])

    def _get_info(self):
        return None

    def render(self, mode='human'):
        pass

    def _is_done(self):
        return self.T == T_TOTAL_SECONDS
Esempio n. 8
0
class RebalancingEnv(gym.Env):
    """
    RebalancingEnv is the environment class for DQN
    Attributes:
        model: AMoD system to train
        dT: time interval for training
        penalty: penalty of rebalancing a vehicle
        action_space: action space
        state: the system state. It's (ui, vi, cik) for every zone, where cik is the cost of going to i. e.g., 67 zones -> 67  * 3.
        center: the centroid of cells
        input_dim: input dimension
    """
    def __init__(self, config, penalty=-10):
        """

        @param config:
        @param penalty:
        """
        print("INSIDE INIT FUNCTION")
        self.config = config
        self.model = Model(
            ZONE_IDS,
            DEMAND_SOURCE,
            WARMUP_TIME_HOUR,
            ANALYSIS_TIME_HOUR,
            FLEET_SIZE=config["fleet_size"],
            PRO_SHARE=config["pro_s"],
            SURGE_MULTIPLIER=config["surge"],
            bonus=config["bonus"],
            percent_false_demand=config["percent_false_demand"],
            percentage_know_fare=config["perc_k"],
        )

        veh = self.model.vehilcs[-1]
        veh.is_AV = True
        # else:
        #     print
        #     self.model = model
        #     self._model_ = copy.deepcopy(model)

        self.dT = INT_REBL
        self.penalty = penalty
        self.action_space = spaces.Discrete(len(ZONE_IDS))
        # why not define an observation space?
        self.state = np.zeros((len(ZONE_IDS), 3))
        # self.center = np.zeros((Mlng, Mlat, 2))
        self.input_dim = 3 * len(ZONE_IDS)
        self.step_count = 0
        self.epi_count = 0
        self.total_reward = 0.0
        self.T = WARMUP_TIME_SECONDS
        self.old_income = 0

    def step(self, action):
        """
        Performs one step of the environment.

        @param action: a vector of length N_AV, which contains the target zone for idle veh, and inaction for busy ones
        implements action, returns new state, reward.
        @return: observed state, reward, flag

        @note: Currently the DQN is inside the model.dispatch_at_time function
        """
        flag = False
        self.step_count += 1
        reward = 0
        # AV
        veh = self.model.vehilcs[-1]
        # As long as a decision for AV is not needed, keep simulating
        while not veh.should_move():
            T = self.T
            T_ = self.T + INT_ASSIGN
            # dispatch the system for INT_ASSIGN seconds
            while T < T_:
                self.model.dispatch_at_time(T, self.penalty)
                T += INT_ASSIGN
            self.T = self.T + INT_ASSIGN
        # check and see if the AV is ready to move. If not, keep simulating
        print("AV should move ")
        T = self.T
        T_ = self.T + INT_ASSIGN
        # move it
        while T < T_:
            self.model.dispatch_at_time(T, self.penalty, action)
            T += INT_ASSIGN
        self.T = self.T + INT_ASSIGN
        # calculate the reward of that action
        total_new_income = np.sum(veh.profits) - self.old_income
        self.old_income = np.sum(veh.profits)
        reward += total_new_income

        self.update_state()

        # print("T_TOTAL_SECONDS",T_TOTAL_SECONDS)
        # print("self.T", self.T)
        if self.T >= T_TOTAL_SECONDS:
            flag = True
            print("Episode is done!")
        return self.state, reward, flag, {}

    def update_state(self, vid=-1):
        """
        Updates the state to be the state of a vehicle.

        @param vid: "vehicle list index" that chooses a vehicle for which to get the state.
        @return: state of the vehicle
        """
        veh = self.model.vehilcs[vid]
        self.state = self.model.get_state(veh)

    def reset(self):
        """
        Restarts the gym environment by resetting all parameters to default.
        @return: the modified state.
        """
        print("Calling the reset method! ")
        self.model = Model(
            ZONE_IDS,
            DEMAND_SOURCE,
            WARMUP_TIME_HOUR,
            ANALYSIS_TIME_HOUR,
            FLEET_SIZE=self.config["fleet_size"],
            PRO_SHARE=self.config["pro_s"],
            SURGE_MULTIPLIER=self.config["surge"],
            bonus=self.config["bonus"],
            percent_false_demand=self.config["percent_false_demand"],
            percentage_know_fare=self.config["perc_k"],
        )

        veh = self.model.vehilcs[-1]
        veh.is_AV = True
        self.total_reward = 0.0
        self.T = WARMUP_TIME_SECONDS
        self.old_income = 0

        self.update_state()
        # self.amods.append( copy.deepcopy(self.amod) )
        return self.state
Esempio n. 9
0
def main():
    """
    Parses command line arguments, sets training environment parameters, creates deep Q-network and trains it
    on gym environment.
    """
    parser = argparse.ArgumentParser(
        description="Simulation of drivers' behavior")
    parser.add_argument(
        '-f',
        '--fleet',
        help=
        'Fleet sizes to simulate, formatted as comma-separated list (i.e. "-f 250,275,300")'
    )
    parser.add_argument(
        '-m',
        '--multiplier',
        help=
        'Surge multiplier, formatted as comma-separated list (i.e. "-m 1,1.5,2")'
    )
    parser.add_argument('-b', '--bonus', type=int, help='Bonus')
    parser.add_argument('-d', '--demand', help='Percent false demand ')
    parser.add_argument(
        '-k',
        '--know',
        help=
        'Percent knowing fare, formatted as comma-separated list (i.e. "-m 1,1.5,2") '
    )
    parser.add_argument(
        '-p',
        '--pro',
        help=
        'Percent pro drivers, formatted as comma-separated list (i.e. "-p 1,1.5,2") '
    )
    parser.add_argument(
        '-av',
        '--av',
        help=
        'Percent AV drivers, formatted as comma-separated list (i.e. "-av 1,1.5,2") '
    )
    parser.add_argument('-nb', '--nb', help='number of steps to train Rl ')

    args = parser.parse_args()
    if args.fleet:
        fleet_sizes = [int(x) for x in args.fleet.split(',')]
    #        fleet_sizes = args.fleet
    else:
        fleet_sizes = FLEET_SIZE

    if args.multiplier:
        # surge = args.multiplier
        surges = [float(x) for x in args.multiplier.split(',')]
    else:
        surges = [SURGE_MULTIPLIER]

    if args.know:
        # surge = args.multiplier
        perc_know = [float(x) for x in args.know.split(',')]
    else:
        perc_know = [PERCE_KNOW]

    if args.bonus:
        bonus = args.bonus
    else:
        bonus = BONUS

    if args.pro:

        pro_share = [float(x) for x in args.pro.split(',')]
    else:
        pro_share = [PRO_SHARE]

    if args.demand:
        percent_false_demand = float(args.demand)
    else:
        percent_false_demand = PERCENT_FALSE_DEMAND

    if args.av:
        av_share = [float(x) for x in args.av.split(',')]
    else:
        av_share = [1]
    if args.nb:
        nb_steps = args.nb
    else:
        nb_steps = 300

    for fleet_size in fleet_sizes:
        for surge in surges:
            for perc_k in perc_know:
                for pro_s in pro_share:
                    m = Model(ZONE_IDS,
                              DEMAND_SOURCE,
                              WARMUP_TIME_HOUR,
                              ANALYSIS_TIME_HOUR,
                              fleet_size=fleet_size,
                              pro_share=pro_s,
                              surge_multiplier=surge,
                              bonus=bonus,
                              percent_false_demand=percent_false_demand,
                              percentage_know_fare=perc_k)

                    # make one veh to be AV
                    veh = m.vehilcs[-1]
                    veh.is_AV = True
                    #
                    env = RebalancingEnv(m, penalty=-0)

                    nb_actions = env.action_space.n
                    input_shape = (1, ) + env.state.shape
                    input_dim = env.input_dim

                    model = Sequential()
                    model.add(Flatten(input_shape=input_shape))
                    model.add(Dense(256, activation='relu'))
                    model.add(Dense(nb_actions, activation='linear'))

                    memory = SequentialMemory(limit=2000, window_length=1)
                    policy = EpsGreedyQPolicy()
                    dqn = DQNAgent(model=model,
                                   nb_actions=nb_actions,
                                   memory=memory,
                                   nb_steps_warmup=100,
                                   target_model_update=1e-2,
                                   policy=policy,
                                   gamma=.99)
                    dqn.compile(Adam(lr=0.001, epsilon=0.05, decay=0.0),
                                metrics=['mae'])

                    dqn.fit(env,
                            nb_steps=nb_steps,
                            action_repetition=1,
                            visualize=False,
                            verbose=2)
                    dqn.save_weights('new_dqn_weights_%s.h5f' % (nb_steps),
                                     overwrite=True)
class RebalancingEnv(gym.Env):
    """
    RebalancingEnv is the environment class for DQN
    Attributes:
        model: AMoD system to train
        dT: time interval for training
        penalty: penalty of rebalancing a vehicle
        action_space: action space
        state: the system state. It's (ui, vi, cik) for every zone, where cik is the cost of going to i. e.g., 67 zones -> 67  * 3.
        center: the centroid of cells
        input_dim: input dimension
    """
    def __init__(self, config):
        super(gym.Env, self).__init__()
        print("INSIDE INIT FUNCTION")
        print(config["av_share"])
        self.config = config
        self.model = Model(
            ZONE_IDS,
            DEMAND_SOURCE,
            WARMUP_TIME_HOUR,
            ANALYSIS_TIME_HOUR,
            FLEET_SIZE=config["fleet_size"],
            PRO_SHARE=config["pro_s"],
            SURGE_MULTIPLIER=config["surge"],
            bonus=config["bonus"],
            percent_false_demand=config["percent_false_demand"],
            percentage_know_fare=config["perc_k"],
            AV_share=config["av_share"],
        )

        self.dT = INT_REBL
        self.action_space = spaces.Discrete(len(ZONE_IDS))
        # why not define an observation space?
        self.state = np.zeros((len(ZONE_IDS), 3))
        self.observation_space = np.zeros((len(ZONE_IDS), 3))

        # self.center = np.zeros((Mlng, Mlat, 2))
        self.input_dim = 3 * len(ZONE_IDS)
        self.step_count = 0
        self.epi_count = 0
        self.total_reward = 0.0
        self.T = WARMUP_TIME_SECONDS
        self.old_income = 0

    def step(self, actions):
        """ 
        actions: a vector of length N_AV, which contains the target zone for idle veh, 
        and inaction for busy ones
        impelements action, returns new state, reward. 
        Currently the DQN is inside the model.dispatch_at_time function 
        """
        # print("Inside Step")
        # print("Step count: ", self.step_count)
        # print("T: ", self.T)
        flag = False
        self.step_count += 1
        for i, veh in enumerate([v for v in self.model.av_vehs]):
            # if the veh has to move, then move it
            if not np.isnan(actions[i]):
                veh.set_action(actions[i])

        # move the world forward
        self.model.dispatch_at_time(self.T)
        self.T = self.T + INT_ASSIGN
        # print("end T: ", self.T)

        state_n = []
        for i, veh in enumerate([v for v in self.model.av_vehs]):
            state_n.append(self.model.get_state(veh, self.T))

        # total_new_income = np.sum(veh.profits) - self.old_income
        # self.old_income = np.sum(veh.profits)
        # # normalize the reward.
        # # from previous runs, avg revenue is 35 with std of 5
        # # (base on Nuts and bolts of DRL)
        # normalized_income = (total_new_income ) #/10
        # reward = normalized_income
        # print("reward")
        # print(reward)
        # total_new_income = np.sum(model.operator.revenues) - self.old_income
        # self.old_income = np.sum(model.operator.revenues)
        # reward += total_new_income
        # report = self.model.get_service_rate_per_zone()
        # system_LOS = report.served.sum()/report.total.sum()
        # reward += system_LOS
        # self.T = self.T+INT_ASSIGN

        print("T_TOTAL_SECONDS", T_TOTAL_SECONDS)
        print("self.T", self.T)

        if self.T >= T_TOTAL_SECONDS:
            flag = True
            print("Episode is done!")

        return state_n, None, flag, {}

    def reset(self):
        print("Calling the reset method! ")
        self.model = Model(
            ZONE_IDS,
            DEMAND_SOURCE,
            WARMUP_TIME_HOUR,
            ANALYSIS_TIME_HOUR,
            FLEET_SIZE=self.config["fleet_size"],
            PRO_SHARE=self.config["pro_s"],
            SURGE_MULTIPLIER=self.config["surge"],
            bonus=self.config["bonus"],
            percent_false_demand=self.config["percent_false_demand"],
            percentage_know_fare=self.config["perc_k"],
            AV_share=self.config["av_share"],
        )

        self.total_reward = 0.0
        self.T = WARMUP_TIME_SECONDS
        self.old_income = 0

        state_n = []
        for _, veh in enumerate([v for v in self.model.av_vehs]):
            state_n.append(self.model.get_state(veh, self.T))

        return state_n
def main():
    parser = argparse.ArgumentParser(
        description="Simulation of drivers' behavior")
    parser.add_argument(
        '-f',
        '--fleet',
        help=
        'Fleet sizes to simulate, formatted as comma-separated list (i.e. "-f 250,275,300")'
    )
    parser.add_argument(
        '-m',
        '--multiplier',
        help=
        'Surge multiplier, formatted as comma-separated list (i.e. "-m 1,1.5,2")'
    )
    parser.add_argument('-b', '--bonus', type=int, help='Bonus')
    parser.add_argument('-d', '--demand', help='Percent false demand ')
    parser.add_argument(
        '-k',
        '--know',
        help=
        'Percent knowing fare, formatted as comma-separated list (i.e. "-m 1,1.5,2") '
    )
    parser.add_argument(
        '-p',
        '--pro',
        help=
        'Percent pro drivers, formatted as comma-separated list (i.e. "-m 1,1.5,2") '
    )
    parser.add_argument('-r',
                        '--replications',
                        help='number of times to run the simulation')
    parser.add_argument('-bb', '--beta', help='BETA')
    parser.add_argument('-b_policy', '--bonus_policy', help='bonus per zone ')
    parser.add_argument('-budget', '--budget', help='budget ')
    args = parser.parse_args()
    # TODO: argpars should get the bonus policy as input
    data_instance = Data()
    if args.fleet:
        fleet_sizes = [int(args.fleet)]
    else:
        fleet_sizes = data_instance.FLEET_SIZE

    if args.multiplier:
        # surge = args.multiplier
        surges = [float(x) for x in args.multiplier.split(',')]
    else:
        surges = [data_instance.SURGE_MULTIPLIER]

    if args.know:
        perc_know = [float(args.know)]
    else:
        perc_know = [data_instance.PERCE_KNOW]

    if args.bonus:
        bonus = args.bonus
    else:
        bonus = data_instance.BONUS
    if args.beta:
        beta = float(args.beta)
    else:
        beta = configs_dict["BETA"]

    if args.pro:
        pro_share = [float(x) for x in args.pro.split(',')]
    else:
        pro_share = [data_instance.PRO_SHARE]

    if args.demand:
        percent_false_demand = float(args.demand)
    else:
        percent_false_demand = data_instance.PERCENT_FALSE_DEMAND
    if args.replications:
        n_rep = int(args.replications)
    else:
        n_rep = 1
    if args.bonus_policy:
        bonus_policy = args.bonus_policy
    else:
        bonus_policy = data_instance.BONUS_POLICY
    if args.budget:
        budget = args.budget
    else:
        budget = data_instance.BUDGET
    # output_path = "./Outputs/avg_fare_info/" + str(beta) + "/"

    for fleet_size in fleet_sizes:
        for surge in surges:
            for perc_k in perc_know:
                for pro_s in pro_share:
                    for repl in range(n_rep):
                        output_path = "./Outputs/avg_fare_info/" + str(budget) + "_" + str(bonus_policy) + "_" + \
                                      str(datetime.datetime.now()).split('.')[0] + "/"
                        if not os.path.exists(output_path):
                            os.makedirs(output_path)
                        print("iteration number ", repl)
                        print('Fleet size is {f}'.format(f=fleet_size))
                        print('Surge is {}'.format(surge))
                        print('Percentage knowing fares is {}'.format(perc_k))
                        print('Percentage of professional drivers {}'.format(
                            pro_s))

                        data_instance.FLEET_SIZE = fleet_size
                        data_instance.PRO_SHARE = pro_s
                        data_instance.SURGE_MULTIPLIER = surge
                        data_instance.BONUS = bonus
                        data_instance.PERCENT_FALSE_DEMAND = percent_false_demand
                        data_instance.PERCE_KNOW = perc_k

                        m = Model(data_instance, configs_dict, beta)

                        # start time
                        stime = time.time()

                        # # dispatch the system for T_TOTAL seconds, at the interval of INT_ASSIGN
                        for T in range(data_instance.WARMUP_TIME_SECONDS,
                                       data_instance.T_TOTAL_SECONDS,
                                       data_instance.INT_ASSIGN):
                            m.dispatch_at_time(T)
                        print('Total drivers: ', len(m.vehicles))
                        print(
                            '# of Pro drivers: ',
                            len([
                                v for v in m.vehicles
                                if v.driver_type == DriverType.PROFESSIONAL
                            ]))
                        print(
                            '# of naive drivers: ',
                            len([
                                v for v in m.vehicles
                                if v.driver_type == DriverType.NAIVE
                            ]))
                        print(
                            '# of inexperienced drivers: ',
                            len([
                                v for v in m.vehicles
                                if v.driver_type == DriverType.INEXPERIENCED
                            ]))
                        # end time
                        etime = time.time()
                        # run time of this simulation
                        runtime = etime - stime
                        print("The run time was {runtime} minutes ".format(
                            runtime=runtime / 60))

                        report = m.get_service_rate_per_zone()

                        # So that it doesn't save a file with 1.5.py, rather 15.py
                        ss = str(surge).split('.')
                        ss = ''.join(ss)

                        report.to_csv(output_path + "report for fleet size " +
                                      str(fleet_size) + " surge " + str(ss) +
                                      "fdemand= " + str(percent_false_demand) +
                                      "perc_k " + str(perc_k) + "pro_s " +
                                      str(pro_s) + " repl" + str(repl) +
                                      ".csv")
Esempio n. 12
0
def main():

    parser = argparse.ArgumentParser(
        description="Simulation of drivers' behavior")
    parser.add_argument(
        "-f",
        "--fleet",
        help=
        'Fleet sizes to simulate, formatted as comma-separated list (i.e. "-f 250,275,300")',
    )
    parser.add_argument(
        "-m",
        "--multiplier",
        help=
        'Surge multiplier, formatted as comma-separated list (i.e. "-m 1,1.5,2")',
    )
    parser.add_argument("-b", "--bonus", type=int, help="Bonus")
    parser.add_argument("-d", "--demand", help="Percent false demand ")
    parser.add_argument(
        "-k",
        "--know",
        help=
        'Percent knowing fare, formatted as comma-separated list (i.e. "-m 1,1.5,2") ',
    )
    parser.add_argument(
        "-p",
        "--pro",
        help=
        'Percent pro drivers, formatted as comma-separated list (i.e. "-m 1,1.5,2") ',
    )
    parser.add_argument("-r",
                        "--replications",
                        help="number of times to run the simulation")
    args = parser.parse_args()
    if args.fleet:
        fleet_sizes = [int(x) for x in args.fleet.split(",")]
    else:
        fleet_sizes = FLEET_SIZE

    if args.multiplier:
        # surge = args.multiplier
        surges = [float(x) for x in args.multiplier.split(",")]
    else:
        surges = [SURGE_MULTIPLIER]

    if args.know:
        # surge = args.multiplier
        perc_know = [float(x) for x in args.know.split(",")]
    else:
        perc_know = [PERCE_KNOW]

    if args.bonus:
        bonus = args.bonus
    else:
        bonus = BONUS

    if args.pro:
        pro_share = [float(x) for x in args.pro.split(",")]
    else:
        pro_share = [PRO_SHARE]

    if args.demand:
        percent_false_demand = float(args.demand)
    else:
        percent_false_demand = PERCENT_FALSE_DEMAND
    if args.replications:
        n_rep = int(args.replications)
    else:
        n_rep = 1

    for fleet_size in fleet_sizes:
        for surge in surges:
            for perc_k in perc_know:
                for pro_s in pro_share:
                    for repl in range(n_rep):
                        print("iteration number ", repl)
                        print("Fleet size is {f}".format(f=fleet_size))
                        print("Surge is {}".format(surge))
                        print("Percentage knowing fares is {}".format(perc_k))
                        print("Percentage of professional drivers {}".format(
                            pro_s))

                        m = Model(
                            ZONE_IDS,
                            DEMAND_SOURCE,
                            WARMUP_TIME_HOUR,
                            ANALYSIS_TIME_HOUR,
                            fleet_size=fleet_size,
                            pro_share=pro_s,
                            surge_multiplier=surge,
                            bonus=bonus,
                            percent_false_demand=percent_false_demand,
                            percentage_know_fare=perc_k,
                        )

                        # start time
                        stime = time.time()

                        # # dispatch the system for T_TOTAL seconds, at the interval of INT_ASSIGN
                        for T in range(WARMUP_TIME_SECONDS, T_TOTAL_SECONDS,
                                       INT_ASSIGN):

                            m.dispatch_at_time(T)

                        # end time
                        etime = time.time()
                        # run time of this simulation
                        runtime = etime - stime
                        print("The run time was {runtime} minutes ".format(
                            runtime=runtime / 60))

                        m.runtime = runtime
                        report = m.get_service_rate_per_zone()

                        # So that it doesn't save a file with 1.5.py, rather 15.py
                        ss = str(surge).split(".")
                        ss = "".join(ss)

                        report.to_csv(output_path + "report for fleet size " +
                                      str(fleet_size) + " surge " + str(ss) +
                                      "fdemand= " + str(percent_false_demand) +
                                      "perc_k " + str(perc_k) + "pro_s " +
                                      str(pro_s) + " repl" + str(repl) +
                                      ".csv")

                        pickle.dump(
                            m,
                            open(
                                output_path + "model for fleet size " +
                                str(fleet_size) + " surge " + str(ss) +
                                "fdemand " + str(percent_false_demand) +
                                "perc_k " + str(perc_k) + "pro_s " +
                                str(pro_s) + " repl" + str(repl) + ".p",
                                "wb",
                            ),
                        )
Esempio n. 13
0
def main():
    print("Start of main()")
    # TODO: all these should be cleaned up like this:
    #  https://github.com/sisl/MADRL/blob/master/madrl_environments/walker/train_multi_walker.py
    parser = argparse.ArgumentParser(description="Simulation of drivers' behavior")
    parser.add_argument('-f', '--fleet',
                        help='Fleet sizes to simulate, formatted as comma-separated list (i.e. "-f 250,275,300")')
    parser.add_argument('-m', '--multiplier',
                        help='Surge multiplier, formatted as comma-separated list (i.e. "-m 1,1.5,2")')
    parser.add_argument('-b', '--bonus', type=int,
                        help='Bonus')
    parser.add_argument('-d', '--demand',
                        help='Percent false demand ')
    parser.add_argument('-k', '--know',
                        help='Percent knowing fare, formatted as comma-separated list (i.e. "-m 1,1.5,2") ')
    parser.add_argument('-p', '--pro',
                        help='Percent pro drivers, formatted as comma-separated list (i.e. "-m 1,1.5,2") ')
    parser.add_argument('-r', '--replications',
                        help='number of times to run the simulation')
    parser.add_argument('-bb', '--beta',
                        help='BETA')
    parser.add_argument('-b_policy', '--bonus_policy',
                        help='bonus per zone ')
    parser.add_argument('-budget', '--budget',
                        help='budget ')
    args = parser.parse_args()
    # TODO: argpars should get the bonus policy as input
    print("instantiate Data object")
    data_instance = Data()
    if args.fleet:
        fleet_sizes = [int(args.fleet)]
    else:
        fleet_sizes = data_instance.FLEET_SIZE

    if args.multiplier:
        # surge = args.multiplier
        surges = [float(x) for x in args.multiplier.split(',')]
    else:
        surges = [data_instance.SURGE_MULTIPLIER]

    if args.know:
        perc_know = [float(args.know)]
    else:
        perc_know = [data_instance.PERCE_KNOW]

    if args.bonus:
        bonus = args.bonus
    else:
        bonus = data_instance.BONUS
    if args.beta:
        beta = float(args.beta)
    else:
        beta = config_dict["BETA"]

    if args.pro:
        pro_share = [float(x) for x in args.pro.split(',')]
    else:
        pro_share = [data_instance.PRO_SHARE]

    if args.demand:
        percent_false_demand = float(args.demand)
    else:
        percent_false_demand = data_instance.PERCENT_FALSE_DEMAND
    if args.replications:
        n_rep = int(args.replications)
    else:
        n_rep = 5
    if args.bonus_policy:
        bonus_policy = args.bonus_policy
    else:
        bonus_policy = data_instance.BONUS_POLICY
    if args.budget:
        budget = args.budget
    else:
        budget = data_instance.BUDGET
    # output_path = "./Outputs/avg_fare_info/" + str(beta) + "/"

    for fleet_size in fleet_sizes:
        for surge in surges:
            for perc_k in perc_know:
                for pro_s in pro_share:
                    for repl in range(n_rep):
                        # output_path = "./Outputs/avg_fare_info/" + str(budget) + "_" + str(bonus_policy) + "_" + \
                        #               str(datetime.datetime.now()).split('.')[0] + "/"
                        # if not os.path.exists(output_path):
                        #     os.makedirs(output_path)
                        print("iteration number ", repl)
                        print('Fleet size is {f}'.format(f=fleet_size))
                        print('Surge is {}'.format(surge))
                        print('Percentage knowing fares is {}'.format(perc_k))
                        print('Percentage of professional drivers {}'.format(pro_s))

                        data_instance.FLEET_SIZE = fleet_size
                        data_instance.PRO_SHARE = pro_s
                        data_instance.SURGE_MULTIPLIER = surge
                        data_instance.BONUS = bonus
                        data_instance.PERCENT_FALSE_DEMAND = percent_false_demand
                        data_instance.PERCE_KNOW = perc_k
                        print("Instantiated the model")
                        m = Model(data_instance, beta)

                        # start time
                        stime = time.time()

                        # # dispatch the system for T_TOTAL seconds, at the interval of INT_ASSIGN
                        for T in range(data_instance.WARMUP_TIME_SECONDS,
                                       data_instance.T_TOTAL_SECONDS,
                                       data_instance.INT_ASSIGN):
                            m.dispatch_at_time(T)

                        # end time
                        etime = time.time()
                        # run time of this simulation
                        runtime = etime - stime
                        print("The run time was {runtime} minutes ".format(runtime=runtime / 60))

                        m.save_zonal_stats("../performance_stats/")
Esempio n. 14
0
import numpy as np
from lib.Data import Data
from lib.configs import config_dict
from lib.utils import Model
from lib.Constants import POLICY_UPDATE_INTERVAL, WARMUP_TIME_SECONDS, T_TOTAL_SECONDS, INT_ASSIGN, \
    ANALYSIS_TIME_SECONDS, DEMAND_UPDATE_INTERVAL
import time
data_instance = Data.init_from_config_dic(config_dict)
m = Model(data_instance)

print('Fleet size is {f}'.format(f=data_instance.FLEET_SIZE))

stime = time.time()

# # dispatch the system for T_TOTAL seconds, at the interval of INT_ASSIGN
for T in range(data_instance.WARMUP_TIME_SECONDS,
               data_instance.T_TOTAL_SECONDS,
               data_instance.INT_ASSIGN):
    m.dispatch_at_time(T)

# end time
etime = time.time()
# run time of this simulation
runtime = etime - stime
print("The run time was {runtime} minutes ".format(runtime=runtime / 60))

m.save_zonal_stats("../performance_stats/")