Beispiel #1
0
def train(args):
    base_dir = args.base_dir
    dirs = init_dir(base_dir)
    init_log(dirs['log'])
    config_dir = args.config_dir
    copy_file(config_dir, dirs['data'])
    config = configparser.ConfigParser()
    config.read(config_dir)
    in_test, post_test = init_test_flag(args.test_mode)

    # init env
    env = init_env(config['ENV_CONFIG'])
    logging.info('Training: a dim %d, agent dim: %d' % (env.n_a, env.n_agent))

    # init step counter
    total_step = int(config.getfloat('TRAIN_CONFIG', 'total_step'))
    test_step = int(config.getfloat('TRAIN_CONFIG', 'test_interval'))
    log_step = int(config.getfloat('TRAIN_CONFIG', 'log_interval'))
    global_counter = Counter(total_step, test_step, log_step)

    # init centralized or multi agent
    seed = config.getint('ENV_CONFIG', 'seed')
    model = init_agent(env, config['MODEL_CONFIG'], total_step, seed)

    # disable multi-threading for safe SUMO implementation
    summary_writer = tf.summary.FileWriter(dirs['log'])
    trainer = Trainer(env,
                      model,
                      global_counter,
                      summary_writer,
                      in_test,
                      output_path=dirs['data'])
    trainer.run()

    # save model
    final_step = global_counter.cur_step
    logging.info('Training: save final model at step %d ...' % final_step)
    model.save(dirs['model'], final_step)

    # post-training test
    if post_test:
        test_dirs = init_dir(base_dir, pathes=['eva_data'])
        evaluator = Evaluator(env, model, test_dirs['eva_data'])
        evaluator.run()
Beispiel #2
0
def train(args):
    base_dir = args.base_dir
    dirs = init_dir(base_dir) #utils
    init_log(dirs['log'])#utils
    config_dir = args.config_dir
    copy_file(config_dir, dirs['data'])
    config = configparser.ConfigParser()
    config.read(config_dir)
    in_test, post_test = init_test_flag(args.test_mode)

# init env
    env = init_env(config['ENV_CONFIG']) #seeonce
    logging.info('Training: s dim: %d, a dim %d, s dim ls: %r, a dim ls: %r' %
                 (env.n_s, env.n_a, env.n_s_ls, env.n_a_ls)) #logging?


    # init step counter
    total_step = int(config.getfloat('TRAIN_CONFIG', 'total_step'))
    test_step = int(config.getfloat('TRAIN_CONFIG', 'test_interval'))
    log_step = int(config.getfloat('TRAIN_CONFIG', 'log_interval'))
    global_counter = Counter(total_step, test_step, log_step)#what is this
# init centralized or multi agent

    seed = config.getint('ENV_CONFIG', 'seed')
    if env.agent == 'iddpg':
        model = IDDPG(env.n_s_ls, env.n_a_ls, env.n_w_ls, total_step,
                     config['MODEL_CONFIG'], seed=seed)
    elif env.agent == 'maddpg':  #TODO: Add MADDPG
        model = MADDPG(env.n_s_ls, env.n_a_ls, env.n_w_ls, env.n_f_ls, total_step,
                     config['MODEL_CONFIG'], seed=seed)
    summary_writer = tf.summary.FileWriter(dirs['log'])#what is this
    trainer = Trainer(env, model, global_counter, summary_writer, in_test, output_path=dirs['data'])#utils
    trainer.run()
   #if post_test: #how?
    #    tester = Tester(env, model, global_counter, summary_writer, dirs['data'])
     #   tester.run_offline(dirs['data'])#utils

    # save model#what's this
    final_step = global_counter.cur_step
    logging.info('Training: save final model at step %d ...' % final_step)
    model.save(dirs['model'], final_step)
Beispiel #3
0
def train(args):
    base_dir = args.base_dir
    dirs = init_dir(base_dir)
    init_log(dirs['log'])
    config_dir = args.config_dir
    copy_file(config_dir, dirs['data'])
    config = configparser.ConfigParser()
    config.read(config_dir)
    in_test, post_test = init_test_flag(args.test_mode)

    # init env
    env = init_env(config['ENV_CONFIG'])
    logging.info('Training: s dim: %d, a dim %d, s dim ls: %r, a dim ls: %r' %
                 (env.n_s, env.n_a, env.n_s_ls, env.n_a_ls))

    # init step counter
    total_step = int(config.getfloat('TRAIN_CONFIG', 'total_step'))
    test_step = int(config.getfloat('TRAIN_CONFIG', 'test_interval'))
    log_step = int(config.getfloat('TRAIN_CONFIG', 'log_interval'))
    global_counter = Counter(total_step, test_step, log_step)

    # init centralized or multi agent
    seed = config.getint('ENV_CONFIG', 'seed')
    # coord = tf.train.Coordinator()

    # if env.agent == 'a2c':
    #     model = A2C(env.n_s, env.n_a, total_step,
    #                 config['MODEL_CONFIG'], seed=seed)
    if env.agent == 'ia2c':
        model = IA2C(env.n_s_ls,
                     env.n_a_ls,
                     env.n_w_ls,
                     total_step,
                     config['MODEL_CONFIG'],
                     seed=seed)
    elif env.agent == 'ma2c':
        model = MA2C(env.n_s_ls,
                     env.n_a_ls,
                     env.n_w_ls,
                     env.n_f_ls,
                     total_step,
                     config['MODEL_CONFIG'],
                     seed=seed)
    elif env.agent == 'iqld':
        model = IQL(env.n_s_ls,
                    env.n_a_ls,
                    env.n_w_ls,
                    total_step,
                    config['MODEL_CONFIG'],
                    seed=0,
                    model_type='dqn')
    else:
        model = IQL(env.n_s_ls,
                    env.n_a_ls,
                    env.n_w_ls,
                    total_step,
                    config['MODEL_CONFIG'],
                    seed=0,
                    model_type='lr')

    # disable multi-threading for safe SUMO implementation
    # threads = []
    summary_writer = tf.summary.FileWriter(dirs['log'])
    trainer = Trainer(env,
                      model,
                      global_counter,
                      summary_writer,
                      in_test,
                      output_path=dirs['data'])
    trainer.run()
    # if in_test or post_test:
    #     # assign a different port for test env
    #     test_env = init_env(config['ENV_CONFIG'], port=1)
    #     tester = Tester(test_env, model, global_counter, summary_writer, dirs['data'])

    # def train_fn():
    #     trainer.run(coord)

    # thread = threading.Thread(target=train_fn)
    # thread.start()
    # threads.append(thread)
    # if in_test:
    #     def test_fn():
    #         tester.run_online(coord)
    #     thread = threading.Thread(target=test_fn)
    #     thread.start()
    #     threads.append(thread)
    # coord.join(threads)

    # post-training test
    if post_test:
        tester = Tester(env, model, global_counter, summary_writer,
                        dirs['data'])
        tester.run_offline(dirs['data'])

    # save model
    final_step = global_counter.cur_step
    logging.info('Training: save final model at step %d ...' % final_step)
    model.save(dirs['model'], final_step)
Beispiel #4
0
def train(args):
    base_dir = args.base_dir
    dirs = init_dir(base_dir)
    init_log(dirs['log'])
    config_dir = args.config_dir
    copy_file(config_dir, dirs['data'])
    config = configparser.ConfigParser()
    config.read(config_dir)
    in_test, post_test = init_test_flag(args.test_mode)

    # init env
    env = init_env(config['ENV_CONFIG'])
    logging.info('Training: s dim: %d, a dim %d, s dim ls: %r, a dim ls: %r' %
                 (env.n_s, env.n_a, env.n_s_ls, env.n_a_ls))

    # init step counter
    total_step = int(config.getfloat('TRAIN_CONFIG', 'total_step'))  #1e6
    test_step = int(config.getfloat('TRAIN_CONFIG', 'test_interval'))  #2e4
    log_step = int(config.getfloat('TRAIN_CONFIG', 'log_interval'))  #1e4
    global_counter = Counter(total_step, test_step, log_step)

    # init centralized or multi agent
    seed = config.getint('ENV_CONFIG', 'seed')  #12
    # coord = tf.train.Coordinator()

    if env.agent == 'ia2c':
        model = IA2C(env.n_s_ls,
                     env.n_a_ls,
                     env.n_w_ls,
                     total_step,
                     config['MODEL_CONFIG'],
                     seed=seed)
    elif env.agent == 'ma2c':
        model = MA2C(env.n_s_ls,
                     env.n_a_ls,
                     env.n_w_ls,
                     env.n_f_ls,
                     total_step,
                     config['MODEL_CONFIG'],
                     seed=seed)
    elif env.agent == 'codql':
        print('This is codql')
        num_agents = len(env.n_s_ls)
        print('num_agents:', num_agents)
        a_dim = env.n_a_ls[0]  # ?????????????????? dim ??or num??
        print('a_dim:', a_dim)
        s_dim = env.n_s_ls[0]
        print('env.n_s_ls=', s_dim)
        s_dim_wait = env.n_w_ls[0]
        print('s_dim_wait:', s_dim_wait)
        #obs_space = s_dim # XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXxx state dim Error
        model = MFQ(nb_agent=num_agents,
                    a_dim=a_dim,
                    s_dim=s_dim,
                    s_dim_wave=s_dim - s_dim_wait,
                    s_dim_wait=s_dim_wait,
                    config=config['MODEL_CONFIG'])
    elif env.agent == 'dqn':
        model = DQN(nb_agent=len(env.n_s_ls),
                    a_dim=env.n_a_ls[0],
                    s_dim=env.n_s_ls[0],
                    s_dim_wave=env.n_s_ls[0] - env.n_w_ls[0],
                    s_dim_wait=env.n_w_ls[0],
                    config=config['MODEL_CONFIG'],
                    doubleQ=False)  #doubleQ=False denotes dqn else ddqn
    elif env.agent == 'ddpg':
        model = DDPGEN(nb_agent=len(env.n_s_ls),
                       share_params=True,
                       a_dim=env.n_a_ls[0],
                       s_dim=env.n_s_ls[0],
                       s_dim_wave=env.n_s_ls[0] - env.n_w_ls[0],
                       s_dim_wait=env.n_w_ls[0])
    elif env.agent == 'iqld':
        model = IQL(env.n_s_ls,
                    env.n_a_ls,
                    env.n_w_ls,
                    total_step,
                    config['MODEL_CONFIG'],
                    seed=0,
                    model_type='dqn')
    else:
        model = IQL(env.n_s_ls,
                    env.n_a_ls,
                    env.n_w_ls,
                    total_step,
                    config['MODEL_CONFIG'],
                    seed=0,
                    model_type='lr')

    summary_writer = tf.summary.FileWriter(dirs['log'])
    trainer = Trainer(env,
                      model,
                      global_counter,
                      summary_writer,
                      in_test,
                      output_path=dirs['data'])
    trainer.run()

    # save model
    final_step = global_counter.cur_step
    logging.info('Training: save final model at step %d ...' % final_step)
    model.save(dirs['model'], final_step)