def __init__(self, idx, args, barrier, netdata, rl_stats, exp_replays, eps, offset): Process.__init__(self) self.idx = idx self.args = args self.barrier = barrier self.netdata = netdata self.sim = SumoSim(args.cfg_fp, args.sim_len, args.tsc, args.nogui, netdata, args, idx) self.rl_stats = rl_stats self.exp_replays = exp_replays self.eps = eps self.offset = offset self.initial = True
def __init__(self, idx, args, barrier, netdata, rl_stats, exp_replays, eps, offset): print('[mylog][simproc.py][20] class SimProc(Process)__init ...') Process.__init__(self) self.idx = idx self.args = args self.barrier = barrier self.netdata = netdata print( '[mylog][simproc.py][26] self.sim = SumoSim(args.cfg_fp, $$$ ...') self.sim = SumoSim(args.cfg_fp, args.sim_len, args.tsc, args.nogui, netdata, args, idx) self.rl_stats = rl_stats self.exp_replays = exp_replays self.eps = eps self.offset = offset self.initial = True
def __init__(self, args, tsc, mode): self.args = args rl_tsc = ['ddpg', 'dqn'] traditional_tsc = ['websters', 'maxpressure', 'sotl', 'uniform'] # depending on tsc alg, different hyper param checks if tsc in rl_tsc: # disable_eager_execution() # need actors and at least one learner if mode == 'train': # ensure we have at least one learner if args.l < 1: args.l = 1 elif mode == 'test': # no learners necessary for testing if args.l > 0: args.l = 0 elif tsc in traditional_tsc: # traditional tsc doesn't require learners if args.l > 0: args.l = 0 else: print('Input argument tsc ' + str(tsc) + ' not found, please provide valid tsc.') return # ensure at least one sim, otherwise no point in running program if args.n < 0: args.n = 1 # if sim arg provided, use to get cfg and netfp # otherwise, continue with args default if args.sim: args.cfg_fp, args.net_fp = get_sim(args.sim) args.nreplay = int(args.nreplay / args.nsteps) barrier = Barrier(args.n + args.l) nd = NetworkData(args.net_fp) netdata = nd.get_net_data() # create a dummy sim to get tsc data for creating nn # print('creating dummy sim for netdata...') sim = SumoSim(args.cfg_fp, args.sim_len, args.tsc, True, netdata, args, -1) sim.gen_sim() netdata = sim.update_netdata() sim.close() # print('...finished with dummy sim') tsc_ids = netdata['inter'].keys() # create mp dict for sharing # reinforcement learning stats rl_stats = self.create_mp_stats_dict(tsc_ids) exp_replays = self.create_mp_exp_replay(tsc_ids) eps_rates = self.get_exploration_rates(args.eps, args.n, args.mode, args.sim) print(eps_rates) offsets = self.get_start_offsets(args.mode, args.sim_len, args.offset, args.n) print(offsets) # create sumo sim procs to generate experiences sim_procs = [ SimProc(i, args, barrier, netdata, rl_stats, exp_replays, eps_rates[i], offsets[i]) for i in range(args.n) ] # create learner procs which are assigned tsc/rl agents # to compute neural net updates for if args.l > 0: learner_agents = self.assign_learner_agents(tsc_ids, args.l) print('===========LEARNER AGENTS') for l in learner_agents: print('============== ' + str(l)) learner_procs = [ LearnerProc(i, args, barrier, netdata, learner_agents[i], rl_stats, exp_replays) for i in range(args.l) ] else: learner_procs = [] # learner_procs = [] self.procs = sim_procs + learner_procs print("self procs:\t", self.procs)
class SimProc(Process): def __init__(self, idx, args, barrier, netdata, rl_stats, exp_replays, eps, offset): print('[mylog][simproc.py][20] class SimProc(Process)__init ...') Process.__init__(self) self.idx = idx self.args = args self.barrier = barrier self.netdata = netdata print( '[mylog][simproc.py][26] self.sim = SumoSim(args.cfg_fp, $$$ ...') self.sim = SumoSim(args.cfg_fp, args.sim_len, args.tsc, args.nogui, netdata, args, idx) self.rl_stats = rl_stats self.exp_replays = exp_replays self.eps = eps self.offset = offset self.initial = True def run(self): learner = False if self.args.load == True and self.args.mode == 'test': load = True else: load = False neural_networks = gen_neural_networks(self.args, self.netdata, self.args.tsc, self.netdata['inter'].keys(), learner, load, self.args.n_hidden) print('sim proc ' + str(self.idx) + ' waiting at barrier ---------') write_to_log(' ACTOR #' + str(self.idx) + ' WAITING AT SYNC WEIGHTS BARRIER...') self.barrier.wait() write_to_log(' ACTOR #' + str(self.idx) + ' BROKEN SYNC BARRIER...') if self.args.l > 0 and self.args.mode == 'train': neural_networks = self.sync_nn_weights(neural_networks) #barrier #grab weights from learner or load from file #barrier if self.args.mode == 'train': while not self.finished_updates(): self.run_sim(neural_networks) if (self.eps == 1.0 or self.eps < 0.02): self.write_to_csv(self.sim.sim_stats()) #self.write_travel_times() self.sim.close() elif self.args.mode == 'test': print( str(self.idx) + ' test waiting at offset ------------- ' + str(self.offset)) print( str(self.idx) + ' test broken offset =================== ' + str(self.offset)) self.initial = False #just run one sim for stats self.run_sim(neural_networks) if (self.eps == 1.0 or self.eps < 0.02) and self.args.mode == 'test': self.write_to_csv(self.sim.sim_stats()) with open(str(self.eps) + '.csv', 'a+') as f: f.write('-----------------\n') self.write_sim_tsc_metrics() #self.write_travel_times() self.sim.close() print('------------------\nFinished on sim process ' + str(self.idx) + ' Closing\n---------------') def run_sim(self, neural_networks): start_t = time.time() self.sim.gen_sim() if self.initial is True: #if the initial sim, run until the offset time reached self.initial = False self.sim.run_offset(self.offset) print( str(self.idx) + ' train waiting at offset ------------- ' + str(self.offset) + ' at ' + str(get_time_now())) write_to_log(' ACTOR #' + str(self.idx) + ' FINISHED RUNNING OFFSET ' + str(self.offset) + ' to time ' + str(self.sim.t) + ' , WAITING FOR OTHER OFFSETS...') self.barrier.wait() print( str(self.idx) + ' train broken offset =================== ' + str(self.offset) + ' at ' + str(get_time_now())) write_to_log(' ACTOR #' + str(self.idx) + ' BROKEN OFFSET BARRIER...') self.sim.create_tsc(self.rl_stats, self.exp_replays, self.eps, neural_networks) write_to_log('ACTOR #' + str(self.idx) + ' START RUN SIM...') self.sim.run() print('sim finished in ' + str(time.time() - start_t) + ' on proc ' + str(self.idx)) write_to_log('ACTOR #' + str(self.idx) + ' FINISHED SIM...') def write_sim_tsc_metrics(self): #get data dict of all tsc in sim #where each tsc has dict of all metrics tsc_metrics = self.sim.get_tsc_metrics() #create file name and path for writing metrics data #now = datetime.datetime.now() #fname = str(self.idx)+'_'+str(now).replace(" ","-") fname = get_time_now() #write all metrics to correct path #path = 'metrics/'+str(self.args.tsc) path = 'metrics/' + str(self.args.tsc) for tsc in tsc_metrics: for m in tsc_metrics[tsc]: mpath = path + '/' + str(m) + '/' + str(tsc) + '/' check_and_make_dir(mpath) save_data(mpath + fname + '_' + str(self.eps) + '_.p', tsc_metrics[tsc][m]) travel_times = self.sim.get_travel_times() path += '/traveltime/' check_and_make_dir(path) save_data(path + fname + '.p', travel_times) ''' def write_ep_return(self): #if rl, only print returns of best fname = get_time_now() #write all metrics to correct path path = 'metrics/'+str(self.args.tsc)+'/returns/' check_and_make_dir(path) save_data( path+(self.self.eps)'_'+fname+'.p', self.sim.get_tsc_returns()) ''' def write_to_csv(self, data): with open(str(self.eps) + '.csv', 'a+') as f: f.write(','.join(data) + '\n') ''' def exp_replay_full(self): for tsc in self.netdata['inter'].keys(): if len(self.exp_replays[tsc]) < self.args.nreplay: print(tsc+' exp replay size '+str(len(self.exp_replays[tsc]))) print(tsc+' updates '+str(self.rl_stats[tsc]['updates'])) return False return True ''' def finished_updates(self): for tsc in self.netdata['inter'].keys(): print(tsc + ' exp replay size ' + str(len(self.exp_replays[tsc]))) print(tsc + ' updates ' + str(self.rl_stats[tsc]['updates'])) if self.rl_stats[tsc]['updates'] < self.args.updates: return False return True def sync_nn_weights(self, neural_networks): for nn in neural_networks: weights = self.rl_stats[nn]['online'] if self.args.tsc == 'ddpg': #sync actor weights neural_networks[nn]['actor'].set_weights(weights, 'online') elif self.args.tsc == 'dqn': neural_networks[nn].set_weights(weights, 'online') else: #raise not found exceptions assert 0, 'Supplied RL traffic signal controller ' + str( self.args.tsc) + ' does not exist.' return neural_networks ''' def get_neural_networks(self, tsctype, tsc_ids): neural_nets = {} if tsctype == 'dqn' or tsctype == 'ddpg': for tsc in tsc_ids: input_d, output_d = get_in_out_d(tsctype, len(self.netdata['inter'][tsc]['incoming_lanes']), len(self.netdata['inter'][tsc]['green_phases'])) learner = False neural_nets[tsc] = nn_factory(self.args.tsc, input_d, output_d, self.args, learner) return neural_nets ''' '''