Beispiel #1
0
def evaluate_fn(agent_dir, output_dir, seeds, port, demo, policy_type):
    agent = agent_dir.split('/')[-1]
    if not check_dir(agent_dir):
        logging.error('Evaluation: %s does not exist!' % agent)
        return
    # load config file for env
    config_dir = find_file(agent_dir + '/data/')
    if not config_dir:
        return
    config = configparser.ConfigParser()
    config.read(config_dir)

    # init env
    env, greedy_policy = init_env(config['ENV_CONFIG'],
                                  port=port,
                                  naive_policy=True)
    logging.info(
        'Evaluation: s dim: %d, a dim %d, s dim ls: %r, a dim ls: %r' %
        (env.n_s, env.n_a, env.n_s_ls, env.n_a_ls))
    env.init_test_seeds(seeds)

    # load model for agent
    if agent != 'greedy':
        # init centralized or multi agent
        if agent == 'a2c':
            model = A2C(env.n_s, env.n_a, 0, config['MODEL_CONFIG'])
        elif agent == 'ia2c':
            model = IA2C(env.n_s_ls, env.n_a_ls, env.n_w_ls, 0,
                         config['MODEL_CONFIG'])
        elif agent == 'ma2c':
            model = MA2C(env.n_s_ls, env.n_a_ls, env.n_w_ls, env.n_f_ls, 0,
                         config['MODEL_CONFIG'])
        elif agent == 'iqld':
            model = IQL(env.n_s_ls,
                        env.n_a_ls,
                        env.n_w_ls,
                        0,
                        config['MODEL_CONFIG'],
                        seed=0,
                        model_type='dqn')
        else:
            model = IQL(env.n_s_ls,
                        env.n_a_ls,
                        env.n_w_ls,
                        0,
                        config['MODEL_CONFIG'],
                        seed=0,
                        model_type='lr')
        if not model.load(agent_dir + '/model/'):
            return
    else:
        model = greedy_policy
    env.agent = agent
    # collect evaluation data
    evaluator = Evaluator(env,
                          model,
                          output_dir,
                          demo=demo,
                          policy_type=policy_type)
    evaluator.run()
Beispiel #2
0
def train_fn(args):
    base_dir = args.base_dir
    dirs = init_dir(base_dir)
    init_log(dirs['log'])
    config_dir = args.config_dir
    # copy_file(config_dir, dirs['data'])
    config = configparser.ConfigParser()
    config.read(config_dir)

    # test during training, test after training
    in_test, post_test = init_test_flag(args.test_mode)

    # Initialize environment
    print("Initializing environment")
    # env = FordEnv(config['ENV_CONFIG'])
    env = gym.make("CartPole-v0")
    n_s = env.observation_space.shape
    logging.info('Training: s dim: %d, a dim %d' %
                 (n_s[0], env.action_space.n))
    n_s_ls = [n_s[0]]
    n_a_ls = [env.action_space.n]
    # init step counter
    total_step = int(config.getfloat('TRAIN_CONFIG', 'total_step'))
    test_step = int(config.getfloat('TRAIN_CONFIG', 'test_interval'))
    log_step = int(config.getfloat('TRAIN_CONFIG', 'log_interval'))
    global_counter = Counter(total_step, test_step, log_step)

    seed = config.getint('ENV_CONFIG', 'seed')

    model = IQL(n_s_ls,
                n_a_ls,
                total_step,
                config['MODEL_CONFIG'],
                seed=0,
                model_type='dqn')

    summary_writer = tf.summary.FileWriter(dirs['log'])
    trainer = Trainer(env,
                      model,
                      global_counter,
                      summary_writer,
                      in_test,
                      output_path=dirs['data'])
    trainer.run()

    # post-training test
    if post_test:
        tester = Tester(env, model, global_counter, summary_writer,
                        dirs['data'])
        tester.run_offline(dirs['data'])

    # save model
    final_step = global_counter.cur_step
    logging.info('Training: save final model at step %d ...' % final_step)
    model.save(dirs['model'], final_step)
Beispiel #3
0
def train(args):
    base_dir = args.base_dir
    dirs = init_dir(base_dir)
    init_log(dirs['log'])
    config_dir = args.config_dir
    copy_file(config_dir, dirs['data'])
    config = configparser.ConfigParser()
    config.read(config_dir)
    in_test, post_test = init_test_flag(args.test_mode)

    # init env
    env = init_env(config['ENV_CONFIG'])
    logging.info('Training: s dim: %d, a dim %d, s dim ls: %r, a dim ls: %r' %
                 (env.n_s, env.n_a, env.n_s_ls, env.n_a_ls))

    # init step counter
    total_step = int(config.getfloat('TRAIN_CONFIG', 'total_step'))
    test_step = int(config.getfloat('TRAIN_CONFIG', 'test_interval'))
    log_step = int(config.getfloat('TRAIN_CONFIG', 'log_interval'))
    global_counter = Counter(total_step, test_step, log_step)

    # init centralized or multi agent
    seed = config.getint('ENV_CONFIG', 'seed')
    # coord = tf.train.Coordinator()

    # if env.agent == 'a2c':
    #     model = A2C(env.n_s, env.n_a, total_step,
    #                 config['MODEL_CONFIG'], seed=seed)
    if env.agent == 'ia2c':
        model = IA2C(env.n_s_ls,
                     env.n_a_ls,
                     env.n_w_ls,
                     total_step,
                     config['MODEL_CONFIG'],
                     seed=seed)
    elif env.agent == 'ma2c':
        model = MA2C(env.n_s_ls,
                     env.n_a_ls,
                     env.n_w_ls,
                     env.n_f_ls,
                     total_step,
                     config['MODEL_CONFIG'],
                     seed=seed)
    elif env.agent == 'iqld':
        model = IQL(env.n_s_ls,
                    env.n_a_ls,
                    env.n_w_ls,
                    total_step,
                    config['MODEL_CONFIG'],
                    seed=0,
                    model_type='dqn')
    else:
        model = IQL(env.n_s_ls,
                    env.n_a_ls,
                    env.n_w_ls,
                    total_step,
                    config['MODEL_CONFIG'],
                    seed=0,
                    model_type='lr')

    # disable multi-threading for safe SUMO implementation
    # threads = []
    summary_writer = tf.summary.FileWriter(dirs['log'])
    trainer = Trainer(env,
                      model,
                      global_counter,
                      summary_writer,
                      in_test,
                      output_path=dirs['data'])
    trainer.run()
    # if in_test or post_test:
    #     # assign a different port for test env
    #     test_env = init_env(config['ENV_CONFIG'], port=1)
    #     tester = Tester(test_env, model, global_counter, summary_writer, dirs['data'])

    # def train_fn():
    #     trainer.run(coord)

    # thread = threading.Thread(target=train_fn)
    # thread.start()
    # threads.append(thread)
    # if in_test:
    #     def test_fn():
    #         tester.run_online(coord)
    #     thread = threading.Thread(target=test_fn)
    #     thread.start()
    #     threads.append(thread)
    # coord.join(threads)

    # post-training test
    if post_test:
        tester = Tester(env, model, global_counter, summary_writer,
                        dirs['data'])
        tester.run_offline(dirs['data'])

    # save model
    final_step = global_counter.cur_step
    logging.info('Training: save final model at step %d ...' % final_step)
    model.save(dirs['model'], final_step)
Beispiel #4
0
def train(args):
    base_dir = args.base_dir
    dirs = init_dir(base_dir)
    init_log(dirs['log'])
    config_dir = args.config_dir
    copy_file(config_dir, dirs['data'])
    config = configparser.ConfigParser()
    config.read(config_dir)
    in_test, post_test = init_test_flag(args.test_mode)

    # init env
    env = init_env(config['ENV_CONFIG'])
    logging.info('Training: s dim: %d, a dim %d, s dim ls: %r, a dim ls: %r' %
                 (env.n_s, env.n_a, env.n_s_ls, env.n_a_ls))

    # init step counter
    total_step = int(config.getfloat('TRAIN_CONFIG', 'total_step'))  #1e6
    test_step = int(config.getfloat('TRAIN_CONFIG', 'test_interval'))  #2e4
    log_step = int(config.getfloat('TRAIN_CONFIG', 'log_interval'))  #1e4
    global_counter = Counter(total_step, test_step, log_step)

    # init centralized or multi agent
    seed = config.getint('ENV_CONFIG', 'seed')  #12
    # coord = tf.train.Coordinator()

    if env.agent == 'ia2c':
        model = IA2C(env.n_s_ls,
                     env.n_a_ls,
                     env.n_w_ls,
                     total_step,
                     config['MODEL_CONFIG'],
                     seed=seed)
    elif env.agent == 'ma2c':
        model = MA2C(env.n_s_ls,
                     env.n_a_ls,
                     env.n_w_ls,
                     env.n_f_ls,
                     total_step,
                     config['MODEL_CONFIG'],
                     seed=seed)
    elif env.agent == 'codql':
        print('This is codql')
        num_agents = len(env.n_s_ls)
        print('num_agents:', num_agents)
        a_dim = env.n_a_ls[0]  # ?????????????????? dim ??or num??
        print('a_dim:', a_dim)
        s_dim = env.n_s_ls[0]
        print('env.n_s_ls=', s_dim)
        s_dim_wait = env.n_w_ls[0]
        print('s_dim_wait:', s_dim_wait)
        #obs_space = s_dim # XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXxx state dim Error
        model = MFQ(nb_agent=num_agents,
                    a_dim=a_dim,
                    s_dim=s_dim,
                    s_dim_wave=s_dim - s_dim_wait,
                    s_dim_wait=s_dim_wait,
                    config=config['MODEL_CONFIG'])
    elif env.agent == 'dqn':
        model = DQN(nb_agent=len(env.n_s_ls),
                    a_dim=env.n_a_ls[0],
                    s_dim=env.n_s_ls[0],
                    s_dim_wave=env.n_s_ls[0] - env.n_w_ls[0],
                    s_dim_wait=env.n_w_ls[0],
                    config=config['MODEL_CONFIG'],
                    doubleQ=False)  #doubleQ=False denotes dqn else ddqn
    elif env.agent == 'ddpg':
        model = DDPGEN(nb_agent=len(env.n_s_ls),
                       share_params=True,
                       a_dim=env.n_a_ls[0],
                       s_dim=env.n_s_ls[0],
                       s_dim_wave=env.n_s_ls[0] - env.n_w_ls[0],
                       s_dim_wait=env.n_w_ls[0])
    elif env.agent == 'iqld':
        model = IQL(env.n_s_ls,
                    env.n_a_ls,
                    env.n_w_ls,
                    total_step,
                    config['MODEL_CONFIG'],
                    seed=0,
                    model_type='dqn')
    else:
        model = IQL(env.n_s_ls,
                    env.n_a_ls,
                    env.n_w_ls,
                    total_step,
                    config['MODEL_CONFIG'],
                    seed=0,
                    model_type='lr')

    summary_writer = tf.summary.FileWriter(dirs['log'])
    trainer = Trainer(env,
                      model,
                      global_counter,
                      summary_writer,
                      in_test,
                      output_path=dirs['data'])
    trainer.run()

    # save model
    final_step = global_counter.cur_step
    logging.info('Training: save final model at step %d ...' % final_step)
    model.save(dirs['model'], final_step)
Beispiel #5
0
def evaluate_fn(agent_dir, output_dir, seeds, port, demo):
    agent = agent_dir.split('/')[-1]
    doubleQ = True
    if agent == 'ddqn':
        doubleQ = False
        agent = 'dqn'
    if not check_dir(agent_dir):
        logging.error('Evaluation: %s does not exist!' % agent)
        return
    # load config file for env
    config_dir = find_file(agent_dir + '/data/')
    if not config_dir:
        return
    config = configparser.ConfigParser()
    config.read(config_dir)

    # init env
    env, greedy_policy = init_env(config['ENV_CONFIG'],
                                  port=port,
                                  naive_policy=True)
    logging.info(
        'Evaluation: s dim: %d, a dim %d, s dim ls: %r, a dim ls: %r' %
        (env.n_s, env.n_a, env.n_s_ls, env.n_a_ls))
    env.init_test_seeds(seeds)

    # load model for agent
    if agent != 'greedy':
        # init centralized or multi agent
        if agent == 'a2c':
            model = A2C(env.n_s, env.n_a, 0, config['MODEL_CONFIG'])
        elif agent == 'ia2c':
            model = IA2C(env.n_s_ls, env.n_a_ls, env.n_w_ls, 0,
                         config['MODEL_CONFIG'])
        elif agent == 'ma2c':
            model = MA2C(env.n_s_ls, env.n_a_ls, env.n_w_ls, env.n_f_ls, 0,
                         config['MODEL_CONFIG'])
        elif agent == 'codql':
            print('This is codql')
            model = MFQ(nb_agent=len(env.n_s_ls),
                        a_dim=env.n_a_ls[0],
                        s_dim=env.n_s_ls[0],
                        s_dim_wave=env.n_s_ls[0] - env.n_w_ls[0],
                        s_dim_wait=env.n_w_ls[0],
                        config=config['MODEL_CONFIG'])
        elif agent == 'dqn':
            model = DQN(nb_agent=len(env.n_s_ls),
                        a_dim=env.n_a_ls[0],
                        s_dim=env.n_s_ls[0],
                        s_dim_wave=env.n_s_ls[0] - env.n_w_ls[0],
                        s_dim_wait=env.n_w_ls[0],
                        config=config['MODEL_CONFIG'],
                        doubleQ=doubleQ)  #doubleQ=False denotes dqn else ddqn
        elif agent == 'ddpg':
            model = DDPGEN(nb_agent=len(env.n_s_ls),
                           share_params=True,
                           a_dim=env.n_a_ls[0],
                           s_dim=env.n_s_ls[0],
                           s_dim_wave=env.n_s_ls[0] - env.n_w_ls[0],
                           s_dim_wait=env.n_w_ls[0])
        elif agent == 'iqld':
            model = IQL(env.n_s_ls,
                        env.n_a_ls,
                        env.n_w_ls,
                        0,
                        config['MODEL_CONFIG'],
                        seed=0,
                        model_type='dqn')
        else:
            model = IQL(env.n_s_ls,
                        env.n_a_ls,
                        env.n_w_ls,
                        0,
                        config['MODEL_CONFIG'],
                        seed=0,
                        model_type='lr')
        if not model.load(agent_dir + '/model/'):
            return
    else:
        model = greedy_policy
    env.agent = agent
    # collect evaluation data
    evaluator = Evaluator(env, model, output_dir, demo=demo)
    evaluator.run()