def evaluate_fn(agent_dir, output_dir, seeds, port, demo, policy_type): agent = agent_dir.split('/')[-1] if not check_dir(agent_dir): logging.error('Evaluation: %s does not exist!' % agent) return # load config file for env config_dir = find_file(agent_dir + '/data/') if not config_dir: return config = configparser.ConfigParser() config.read(config_dir) # init env env, greedy_policy = init_env(config['ENV_CONFIG'], port=port, naive_policy=True) logging.info( 'Evaluation: s dim: %d, a dim %d, s dim ls: %r, a dim ls: %r' % (env.n_s, env.n_a, env.n_s_ls, env.n_a_ls)) env.init_test_seeds(seeds) # load model for agent if agent != 'greedy': # init centralized or multi agent if agent == 'a2c': model = A2C(env.n_s, env.n_a, 0, config['MODEL_CONFIG']) elif agent == 'ia2c': model = IA2C(env.n_s_ls, env.n_a_ls, env.n_w_ls, 0, config['MODEL_CONFIG']) elif agent == 'ma2c': model = MA2C(env.n_s_ls, env.n_a_ls, env.n_w_ls, env.n_f_ls, 0, config['MODEL_CONFIG']) elif agent == 'iqld': model = IQL(env.n_s_ls, env.n_a_ls, env.n_w_ls, 0, config['MODEL_CONFIG'], seed=0, model_type='dqn') else: model = IQL(env.n_s_ls, env.n_a_ls, env.n_w_ls, 0, config['MODEL_CONFIG'], seed=0, model_type='lr') if not model.load(agent_dir + '/model/'): return else: model = greedy_policy env.agent = agent # collect evaluation data evaluator = Evaluator(env, model, output_dir, demo=demo, policy_type=policy_type) evaluator.run()
def train_fn(args): base_dir = args.base_dir dirs = init_dir(base_dir) init_log(dirs['log']) config_dir = args.config_dir # copy_file(config_dir, dirs['data']) config = configparser.ConfigParser() config.read(config_dir) # test during training, test after training in_test, post_test = init_test_flag(args.test_mode) # Initialize environment print("Initializing environment") # env = FordEnv(config['ENV_CONFIG']) env = gym.make("CartPole-v0") n_s = env.observation_space.shape logging.info('Training: s dim: %d, a dim %d' % (n_s[0], env.action_space.n)) n_s_ls = [n_s[0]] n_a_ls = [env.action_space.n] # init step counter total_step = int(config.getfloat('TRAIN_CONFIG', 'total_step')) test_step = int(config.getfloat('TRAIN_CONFIG', 'test_interval')) log_step = int(config.getfloat('TRAIN_CONFIG', 'log_interval')) global_counter = Counter(total_step, test_step, log_step) seed = config.getint('ENV_CONFIG', 'seed') model = IQL(n_s_ls, n_a_ls, total_step, config['MODEL_CONFIG'], seed=0, model_type='dqn') summary_writer = tf.summary.FileWriter(dirs['log']) trainer = Trainer(env, model, global_counter, summary_writer, in_test, output_path=dirs['data']) trainer.run() # post-training test if post_test: tester = Tester(env, model, global_counter, summary_writer, dirs['data']) tester.run_offline(dirs['data']) # save model final_step = global_counter.cur_step logging.info('Training: save final model at step %d ...' % final_step) model.save(dirs['model'], final_step)
def train(args): base_dir = args.base_dir dirs = init_dir(base_dir) init_log(dirs['log']) config_dir = args.config_dir copy_file(config_dir, dirs['data']) config = configparser.ConfigParser() config.read(config_dir) in_test, post_test = init_test_flag(args.test_mode) # init env env = init_env(config['ENV_CONFIG']) logging.info('Training: s dim: %d, a dim %d, s dim ls: %r, a dim ls: %r' % (env.n_s, env.n_a, env.n_s_ls, env.n_a_ls)) # init step counter total_step = int(config.getfloat('TRAIN_CONFIG', 'total_step')) test_step = int(config.getfloat('TRAIN_CONFIG', 'test_interval')) log_step = int(config.getfloat('TRAIN_CONFIG', 'log_interval')) global_counter = Counter(total_step, test_step, log_step) # init centralized or multi agent seed = config.getint('ENV_CONFIG', 'seed') # coord = tf.train.Coordinator() # if env.agent == 'a2c': # model = A2C(env.n_s, env.n_a, total_step, # config['MODEL_CONFIG'], seed=seed) if env.agent == 'ia2c': model = IA2C(env.n_s_ls, env.n_a_ls, env.n_w_ls, total_step, config['MODEL_CONFIG'], seed=seed) elif env.agent == 'ma2c': model = MA2C(env.n_s_ls, env.n_a_ls, env.n_w_ls, env.n_f_ls, total_step, config['MODEL_CONFIG'], seed=seed) elif env.agent == 'iqld': model = IQL(env.n_s_ls, env.n_a_ls, env.n_w_ls, total_step, config['MODEL_CONFIG'], seed=0, model_type='dqn') else: model = IQL(env.n_s_ls, env.n_a_ls, env.n_w_ls, total_step, config['MODEL_CONFIG'], seed=0, model_type='lr') # disable multi-threading for safe SUMO implementation # threads = [] summary_writer = tf.summary.FileWriter(dirs['log']) trainer = Trainer(env, model, global_counter, summary_writer, in_test, output_path=dirs['data']) trainer.run() # if in_test or post_test: # # assign a different port for test env # test_env = init_env(config['ENV_CONFIG'], port=1) # tester = Tester(test_env, model, global_counter, summary_writer, dirs['data']) # def train_fn(): # trainer.run(coord) # thread = threading.Thread(target=train_fn) # thread.start() # threads.append(thread) # if in_test: # def test_fn(): # tester.run_online(coord) # thread = threading.Thread(target=test_fn) # thread.start() # threads.append(thread) # coord.join(threads) # post-training test if post_test: tester = Tester(env, model, global_counter, summary_writer, dirs['data']) tester.run_offline(dirs['data']) # save model final_step = global_counter.cur_step logging.info('Training: save final model at step %d ...' % final_step) model.save(dirs['model'], final_step)
def train(args): base_dir = args.base_dir dirs = init_dir(base_dir) init_log(dirs['log']) config_dir = args.config_dir copy_file(config_dir, dirs['data']) config = configparser.ConfigParser() config.read(config_dir) in_test, post_test = init_test_flag(args.test_mode) # init env env = init_env(config['ENV_CONFIG']) logging.info('Training: s dim: %d, a dim %d, s dim ls: %r, a dim ls: %r' % (env.n_s, env.n_a, env.n_s_ls, env.n_a_ls)) # init step counter total_step = int(config.getfloat('TRAIN_CONFIG', 'total_step')) #1e6 test_step = int(config.getfloat('TRAIN_CONFIG', 'test_interval')) #2e4 log_step = int(config.getfloat('TRAIN_CONFIG', 'log_interval')) #1e4 global_counter = Counter(total_step, test_step, log_step) # init centralized or multi agent seed = config.getint('ENV_CONFIG', 'seed') #12 # coord = tf.train.Coordinator() if env.agent == 'ia2c': model = IA2C(env.n_s_ls, env.n_a_ls, env.n_w_ls, total_step, config['MODEL_CONFIG'], seed=seed) elif env.agent == 'ma2c': model = MA2C(env.n_s_ls, env.n_a_ls, env.n_w_ls, env.n_f_ls, total_step, config['MODEL_CONFIG'], seed=seed) elif env.agent == 'codql': print('This is codql') num_agents = len(env.n_s_ls) print('num_agents:', num_agents) a_dim = env.n_a_ls[0] # ?????????????????? dim ??or num?? print('a_dim:', a_dim) s_dim = env.n_s_ls[0] print('env.n_s_ls=', s_dim) s_dim_wait = env.n_w_ls[0] print('s_dim_wait:', s_dim_wait) #obs_space = s_dim # XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXxx state dim Error model = MFQ(nb_agent=num_agents, a_dim=a_dim, s_dim=s_dim, s_dim_wave=s_dim - s_dim_wait, s_dim_wait=s_dim_wait, config=config['MODEL_CONFIG']) elif env.agent == 'dqn': model = DQN(nb_agent=len(env.n_s_ls), a_dim=env.n_a_ls[0], s_dim=env.n_s_ls[0], s_dim_wave=env.n_s_ls[0] - env.n_w_ls[0], s_dim_wait=env.n_w_ls[0], config=config['MODEL_CONFIG'], doubleQ=False) #doubleQ=False denotes dqn else ddqn elif env.agent == 'ddpg': model = DDPGEN(nb_agent=len(env.n_s_ls), share_params=True, a_dim=env.n_a_ls[0], s_dim=env.n_s_ls[0], s_dim_wave=env.n_s_ls[0] - env.n_w_ls[0], s_dim_wait=env.n_w_ls[0]) elif env.agent == 'iqld': model = IQL(env.n_s_ls, env.n_a_ls, env.n_w_ls, total_step, config['MODEL_CONFIG'], seed=0, model_type='dqn') else: model = IQL(env.n_s_ls, env.n_a_ls, env.n_w_ls, total_step, config['MODEL_CONFIG'], seed=0, model_type='lr') summary_writer = tf.summary.FileWriter(dirs['log']) trainer = Trainer(env, model, global_counter, summary_writer, in_test, output_path=dirs['data']) trainer.run() # save model final_step = global_counter.cur_step logging.info('Training: save final model at step %d ...' % final_step) model.save(dirs['model'], final_step)
def evaluate_fn(agent_dir, output_dir, seeds, port, demo): agent = agent_dir.split('/')[-1] doubleQ = True if agent == 'ddqn': doubleQ = False agent = 'dqn' if not check_dir(agent_dir): logging.error('Evaluation: %s does not exist!' % agent) return # load config file for env config_dir = find_file(agent_dir + '/data/') if not config_dir: return config = configparser.ConfigParser() config.read(config_dir) # init env env, greedy_policy = init_env(config['ENV_CONFIG'], port=port, naive_policy=True) logging.info( 'Evaluation: s dim: %d, a dim %d, s dim ls: %r, a dim ls: %r' % (env.n_s, env.n_a, env.n_s_ls, env.n_a_ls)) env.init_test_seeds(seeds) # load model for agent if agent != 'greedy': # init centralized or multi agent if agent == 'a2c': model = A2C(env.n_s, env.n_a, 0, config['MODEL_CONFIG']) elif agent == 'ia2c': model = IA2C(env.n_s_ls, env.n_a_ls, env.n_w_ls, 0, config['MODEL_CONFIG']) elif agent == 'ma2c': model = MA2C(env.n_s_ls, env.n_a_ls, env.n_w_ls, env.n_f_ls, 0, config['MODEL_CONFIG']) elif agent == 'codql': print('This is codql') model = MFQ(nb_agent=len(env.n_s_ls), a_dim=env.n_a_ls[0], s_dim=env.n_s_ls[0], s_dim_wave=env.n_s_ls[0] - env.n_w_ls[0], s_dim_wait=env.n_w_ls[0], config=config['MODEL_CONFIG']) elif agent == 'dqn': model = DQN(nb_agent=len(env.n_s_ls), a_dim=env.n_a_ls[0], s_dim=env.n_s_ls[0], s_dim_wave=env.n_s_ls[0] - env.n_w_ls[0], s_dim_wait=env.n_w_ls[0], config=config['MODEL_CONFIG'], doubleQ=doubleQ) #doubleQ=False denotes dqn else ddqn elif agent == 'ddpg': model = DDPGEN(nb_agent=len(env.n_s_ls), share_params=True, a_dim=env.n_a_ls[0], s_dim=env.n_s_ls[0], s_dim_wave=env.n_s_ls[0] - env.n_w_ls[0], s_dim_wait=env.n_w_ls[0]) elif agent == 'iqld': model = IQL(env.n_s_ls, env.n_a_ls, env.n_w_ls, 0, config['MODEL_CONFIG'], seed=0, model_type='dqn') else: model = IQL(env.n_s_ls, env.n_a_ls, env.n_w_ls, 0, config['MODEL_CONFIG'], seed=0, model_type='lr') if not model.load(agent_dir + '/model/'): return else: model = greedy_policy env.agent = agent # collect evaluation data evaluator = Evaluator(env, model, output_dir, demo=demo) evaluator.run()