def train(args): base_dir = args.base_dir dirs = init_dir(base_dir) init_log(dirs['log']) config_dir = args.config_dir copy_file(config_dir, dirs['data']) config = configparser.ConfigParser() config.read(config_dir) in_test, post_test = init_test_flag(args.test_mode) # init env env = init_env(config['ENV_CONFIG']) logging.info('Training: a dim %d, agent dim: %d' % (env.n_a, env.n_agent)) # init step counter total_step = int(config.getfloat('TRAIN_CONFIG', 'total_step')) test_step = int(config.getfloat('TRAIN_CONFIG', 'test_interval')) log_step = int(config.getfloat('TRAIN_CONFIG', 'log_interval')) global_counter = Counter(total_step, test_step, log_step) # init centralized or multi agent seed = config.getint('ENV_CONFIG', 'seed') model = init_agent(env, config['MODEL_CONFIG'], total_step, seed) # disable multi-threading for safe SUMO implementation summary_writer = tf.summary.FileWriter(dirs['log']) trainer = Trainer(env, model, global_counter, summary_writer, in_test, output_path=dirs['data']) trainer.run() # save model final_step = global_counter.cur_step logging.info('Training: save final model at step %d ...' % final_step) model.save(dirs['model'], final_step) # post-training test if post_test: test_dirs = init_dir(base_dir, pathes=['eva_data']) evaluator = Evaluator(env, model, test_dirs['eva_data']) evaluator.run()
def train(args): base_dir = args.base_dir dirs = init_dir(base_dir) #utils init_log(dirs['log'])#utils config_dir = args.config_dir copy_file(config_dir, dirs['data']) config = configparser.ConfigParser() config.read(config_dir) in_test, post_test = init_test_flag(args.test_mode) # init env env = init_env(config['ENV_CONFIG']) #seeonce logging.info('Training: s dim: %d, a dim %d, s dim ls: %r, a dim ls: %r' % (env.n_s, env.n_a, env.n_s_ls, env.n_a_ls)) #logging? # init step counter total_step = int(config.getfloat('TRAIN_CONFIG', 'total_step')) test_step = int(config.getfloat('TRAIN_CONFIG', 'test_interval')) log_step = int(config.getfloat('TRAIN_CONFIG', 'log_interval')) global_counter = Counter(total_step, test_step, log_step)#what is this # init centralized or multi agent seed = config.getint('ENV_CONFIG', 'seed') if env.agent == 'iddpg': model = IDDPG(env.n_s_ls, env.n_a_ls, env.n_w_ls, total_step, config['MODEL_CONFIG'], seed=seed) elif env.agent == 'maddpg': #TODO: Add MADDPG model = MADDPG(env.n_s_ls, env.n_a_ls, env.n_w_ls, env.n_f_ls, total_step, config['MODEL_CONFIG'], seed=seed) summary_writer = tf.summary.FileWriter(dirs['log'])#what is this trainer = Trainer(env, model, global_counter, summary_writer, in_test, output_path=dirs['data'])#utils trainer.run() #if post_test: #how? # tester = Tester(env, model, global_counter, summary_writer, dirs['data']) # tester.run_offline(dirs['data'])#utils # save model#what's this final_step = global_counter.cur_step logging.info('Training: save final model at step %d ...' % final_step) model.save(dirs['model'], final_step)
def train(args): base_dir = args.base_dir dirs = init_dir(base_dir) init_log(dirs['log']) config_dir = args.config_dir copy_file(config_dir, dirs['data']) config = configparser.ConfigParser() config.read(config_dir) in_test, post_test = init_test_flag(args.test_mode) # init env env = init_env(config['ENV_CONFIG']) logging.info('Training: s dim: %d, a dim %d, s dim ls: %r, a dim ls: %r' % (env.n_s, env.n_a, env.n_s_ls, env.n_a_ls)) # init step counter total_step = int(config.getfloat('TRAIN_CONFIG', 'total_step')) test_step = int(config.getfloat('TRAIN_CONFIG', 'test_interval')) log_step = int(config.getfloat('TRAIN_CONFIG', 'log_interval')) global_counter = Counter(total_step, test_step, log_step) # init centralized or multi agent seed = config.getint('ENV_CONFIG', 'seed') # coord = tf.train.Coordinator() # if env.agent == 'a2c': # model = A2C(env.n_s, env.n_a, total_step, # config['MODEL_CONFIG'], seed=seed) if env.agent == 'ia2c': model = IA2C(env.n_s_ls, env.n_a_ls, env.n_w_ls, total_step, config['MODEL_CONFIG'], seed=seed) elif env.agent == 'ma2c': model = MA2C(env.n_s_ls, env.n_a_ls, env.n_w_ls, env.n_f_ls, total_step, config['MODEL_CONFIG'], seed=seed) elif env.agent == 'iqld': model = IQL(env.n_s_ls, env.n_a_ls, env.n_w_ls, total_step, config['MODEL_CONFIG'], seed=0, model_type='dqn') else: model = IQL(env.n_s_ls, env.n_a_ls, env.n_w_ls, total_step, config['MODEL_CONFIG'], seed=0, model_type='lr') # disable multi-threading for safe SUMO implementation # threads = [] summary_writer = tf.summary.FileWriter(dirs['log']) trainer = Trainer(env, model, global_counter, summary_writer, in_test, output_path=dirs['data']) trainer.run() # if in_test or post_test: # # assign a different port for test env # test_env = init_env(config['ENV_CONFIG'], port=1) # tester = Tester(test_env, model, global_counter, summary_writer, dirs['data']) # def train_fn(): # trainer.run(coord) # thread = threading.Thread(target=train_fn) # thread.start() # threads.append(thread) # if in_test: # def test_fn(): # tester.run_online(coord) # thread = threading.Thread(target=test_fn) # thread.start() # threads.append(thread) # coord.join(threads) # post-training test if post_test: tester = Tester(env, model, global_counter, summary_writer, dirs['data']) tester.run_offline(dirs['data']) # save model final_step = global_counter.cur_step logging.info('Training: save final model at step %d ...' % final_step) model.save(dirs['model'], final_step)
def train(args): base_dir = args.base_dir dirs = init_dir(base_dir) init_log(dirs['log']) config_dir = args.config_dir copy_file(config_dir, dirs['data']) config = configparser.ConfigParser() config.read(config_dir) in_test, post_test = init_test_flag(args.test_mode) # init env env = init_env(config['ENV_CONFIG']) logging.info('Training: s dim: %d, a dim %d, s dim ls: %r, a dim ls: %r' % (env.n_s, env.n_a, env.n_s_ls, env.n_a_ls)) # init step counter total_step = int(config.getfloat('TRAIN_CONFIG', 'total_step')) #1e6 test_step = int(config.getfloat('TRAIN_CONFIG', 'test_interval')) #2e4 log_step = int(config.getfloat('TRAIN_CONFIG', 'log_interval')) #1e4 global_counter = Counter(total_step, test_step, log_step) # init centralized or multi agent seed = config.getint('ENV_CONFIG', 'seed') #12 # coord = tf.train.Coordinator() if env.agent == 'ia2c': model = IA2C(env.n_s_ls, env.n_a_ls, env.n_w_ls, total_step, config['MODEL_CONFIG'], seed=seed) elif env.agent == 'ma2c': model = MA2C(env.n_s_ls, env.n_a_ls, env.n_w_ls, env.n_f_ls, total_step, config['MODEL_CONFIG'], seed=seed) elif env.agent == 'codql': print('This is codql') num_agents = len(env.n_s_ls) print('num_agents:', num_agents) a_dim = env.n_a_ls[0] # ?????????????????? dim ??or num?? print('a_dim:', a_dim) s_dim = env.n_s_ls[0] print('env.n_s_ls=', s_dim) s_dim_wait = env.n_w_ls[0] print('s_dim_wait:', s_dim_wait) #obs_space = s_dim # XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXxx state dim Error model = MFQ(nb_agent=num_agents, a_dim=a_dim, s_dim=s_dim, s_dim_wave=s_dim - s_dim_wait, s_dim_wait=s_dim_wait, config=config['MODEL_CONFIG']) elif env.agent == 'dqn': model = DQN(nb_agent=len(env.n_s_ls), a_dim=env.n_a_ls[0], s_dim=env.n_s_ls[0], s_dim_wave=env.n_s_ls[0] - env.n_w_ls[0], s_dim_wait=env.n_w_ls[0], config=config['MODEL_CONFIG'], doubleQ=False) #doubleQ=False denotes dqn else ddqn elif env.agent == 'ddpg': model = DDPGEN(nb_agent=len(env.n_s_ls), share_params=True, a_dim=env.n_a_ls[0], s_dim=env.n_s_ls[0], s_dim_wave=env.n_s_ls[0] - env.n_w_ls[0], s_dim_wait=env.n_w_ls[0]) elif env.agent == 'iqld': model = IQL(env.n_s_ls, env.n_a_ls, env.n_w_ls, total_step, config['MODEL_CONFIG'], seed=0, model_type='dqn') else: model = IQL(env.n_s_ls, env.n_a_ls, env.n_w_ls, total_step, config['MODEL_CONFIG'], seed=0, model_type='lr') summary_writer = tf.summary.FileWriter(dirs['log']) trainer = Trainer(env, model, global_counter, summary_writer, in_test, output_path=dirs['data']) trainer.run() # save model final_step = global_counter.cur_step logging.info('Training: save final model at step %d ...' % final_step) model.save(dirs['model'], final_step)