Exemple #1
0
def main(args):
    # Initialize the environment
    env = magent.GridWorld('battle', map_size=args.map_size)
    env.set_render_dir(
        os.path.join(BASE_DIR, 'examples/battle_model', 'build/render'))
    handles = env.get_handles()

    tf_config = tf.ConfigProto(allow_soft_placement=True,
                               log_device_placement=False)
    tf_config.gpu_options.allow_growth = True

    log_dir = os.path.join(BASE_DIR, 'data/tmp'.format(args.algo))
    model_dir = os.path.join(BASE_DIR, 'data/models/{}'.format(args.algo))

    start_from = 0

    sess = tf.Session(config=tf_config)
    models = [
        spawn_ai(args.algo, sess, env, handles[0], args.algo + '-me',
                 args.max_steps),
        spawn_ai(args.algo, sess, env, handles[1], args.algo + '-opponent',
                 args.max_steps)
    ]
    sess.run(tf.global_variables_initializer())
    runner = tools.Runner(sess,
                          env,
                          handles,
                          args.map_size,
                          args.max_steps,
                          models,
                          play,
                          render_every=args.save_every if args.render else 0,
                          save_every=args.save_every,
                          tau=0.01,
                          log_name=args.algo,
                          log_dir=log_dir,
                          model_dir=model_dir,
                          train=True)

    for k in range(start_from, start_from + args.n_round):
        eps = linear_decay(k, [0, int(args.n_round * 0.8), args.n_round],
                           [1, 0.2, 0.1])
        runner.run(eps, k)
Exemple #2
0
        original_main = False
    #
    if args.oppo in ['ac', 'il', 'mfq', 'mfac']:
        original_oppo = True
    else:
        original_oppo = False
    #
    if args.neighbor_nums == -1:
        bias_flag = False
    else:
        bias_flag = True
    sess = tf.Session(config=tf_config)
    #
    if args.algo == args.oppo:
        models = [
            spawn_ai(args.algo, sess, env, handles[0], args.algo + '-me',
                     args.max_steps, args.neighbor_nums, bias_flag),
            spawn_ai(args.oppo, sess, env, handles[1], args.oppo + '-opponent',
                     args.max_steps, args.neighbor_nums, bias_flag)
        ]

    else:
        models = [
            spawn_ai(args.algo, sess, env, handles[0], args.algo + '-me',
                     args.max_steps, args.neighbor_nums, bias_flag),
            spawn_ai(args.oppo, sess, env, handles[1], args.oppo + '-me',
                     args.max_steps, args.neighbor_nums, bias_flag)
        ]

    #
    sess.run(tf.global_variables_initializer())
Exemple #3
0
    env.set_render_dir(
        os.path.join(BASE_DIR, 'examples/battle_model', 'build/render'))
    handles = env.get_handles()

    tf_config = tf.ConfigProto(allow_soft_placement=True,
                               log_device_placement=False)
    tf_config.gpu_options.allow_growth = True

    main_model_dir = os.path.join(BASE_DIR,
                                  'data/models/{}-0'.format(args.algo))
    oppo_model_dir = os.path.join(BASE_DIR,
                                  'data/models/{}-1'.format(args.oppo))

    sess = tf.Session(config=tf_config)
    models = [
        spawn_ai(args.algo, sess, env, handles[0], args.algo + '-me',
                 args.max_steps),
        spawn_ai(args.oppo, sess, env, handles[1], args.oppo + '-opponent',
                 args.max_steps)
    ]
    sess.run(tf.global_variables_initializer())

    models[0].load(main_model_dir, step=args.idx[0])
    models[1].load(oppo_model_dir, step=args.idx[1])

    runner = tools.Runner(sess,
                          env,
                          handles,
                          args.map_size,
                          args.max_steps,
                          models,
                          battle,
Exemple #4
0
    # Initialize the environment
    env = magent.GridWorld('battle', map_size=args.map_size)
    env.set_render_dir(os.path.join(BASE_DIR, 'examples/battle_model', 'build/render'))
    handles = env.get_handles()
    mtmfq_position = args.mtmfqp
    tf_config = tf.ConfigProto(allow_soft_placement=True, log_device_placement=False)
    tf_config.gpu_options.allow_growth = True

    main_model_dir = os.path.join(BASE_DIR, 'data/models/{}-0'.format(args.algo))
    oppo_model_dir1 = os.path.join(BASE_DIR, 'data/models/{}-1'.format(args.oppo1))
    oppo_model_dir2 = os.path.join(BASE_DIR, 'data/models/{}-2'.format(args.oppo2))
    oppo_model_dir3 = os.path.join(BASE_DIR, 'data/models/{}-3'.format(args.oppo3))

    sess = tf.Session(config=tf_config)
    models = [spawn_ai(args.algo, sess, env, handles[0], args.algo + '-me', args.max_steps), spawn_ai(args.oppo1, sess, env, handles[1], args.oppo1 + '-opponent1', args.max_steps), spawn_ai(args.oppo2, sess, env, handles[2], args.oppo2 + '-opponent2', args.max_steps), spawn_ai(args.oppo3, sess, env, handles[3], args.oppo3 + '-opponent3', args.max_steps)]
    sess.run(tf.global_variables_initializer())

    models[0].load(main_model_dir, step=args.idx[0])
    models[1].load(oppo_model_dir1, step=args.idx[1])
    models[2].load(oppo_model_dir2, step=args.idx[2])
    models[3].load(oppo_model_dir3, step=args.idx[3])

    runner = tools.Runner(sess, env, handles, args.map_size, args.max_steps, models, battle, mtmfq_position, render_every=0)
    win_cnt = {'main': 0, 'opponent1': 0, 'opponent2': 0, 'opponent3': 0}
    total_rewards = []
    with open('storepoints_multibattle.csv', 'w+') as myfile:
        myfile.write('{0},{1},{2},{3},{4}\n'.format("Game", "Reward 1", "Reward 2","Reward 3", "Reward 4"))
    for k in range(0, args.n_round):
        total_rewards = runner.run(0.0, k, win_cnt=win_cnt)
        with open('storepoints_multibattle.csv', 'a') as myfile:
Exemple #5
0
    model_dir = os.path.join(BASE_DIR, 'data/models/{}'.format(args.algo))

    with open('predator.csv', 'w+') as myfile:
        myfile.write('{0},{1}\n'.format("Episode", "Reward"))

    if args.algo in ['mfq', 'mfac', 'mtmfq']:
        use_mf = True
    else:
        use_mf = False

    start_from = 0
    total_reward = []
    meanerrortotal = []
    sess = tf.Session(config=tf_config)
    models = [
        spawn_ai(args.algo, sess, env, handles[0], args.algo + '-me',
                 args.max_steps),
        spawn_ai(args.algo, sess, env, handles[1], args.algo + '-opponent1',
                 args.max_steps),
        spawn_ai(args.algo, sess, env, handles[2], args.algo + '-opponent2',
                 args.max_steps),
        spawn_ai(args.algo, sess, env, handles[3], args.algo + '-opponent3',
                 args.max_steps)
    ]
    sess.run(tf.global_variables_initializer())
    if args.algo == 'mtmfq':
        runner = tools.Runner(
            sess,
            env,
            handles,
            args.map_size,
            args.max_steps,
Exemple #6
0
    start_from = 0

    sess = tf.Session(config=tf_config)

    main_model_dir = os.path.join(BASE_DIR,
                                  'data/models/{}-0'.format(args.algo))
    oppo_model_dir = os.path.join(BASE_DIR,
                                  'data/models/{}-1'.format(args.algo))
    main_msg_dir = os.path.join(BASE_DIR,
                                'data/models/{}-msg0'.format(args.algo))
    oppo_msg_dir = os.path.join(BASE_DIR,
                                'data/models/{}-msg1'.format(args.algo))

    models = [
        spawn_ai(args.algo, sess, env, handles[0], args.algo + '-me',
                 args.max_steps),
        spawn_ai(args.algo, sess, env, handles[1], args.algo + '-oppo',
                 args.max_steps)
    ]

    if args.usemsg != 'None':
        MsgModels = [
            spawn_ai('msgdqn', sess, env, handles[0], 'msgdqn' + '-me',
                     args.max_steps),
            spawn_ai('msgdqn', sess, env, handles[1], 'msgdqn' + '-opponent',
                     args.max_steps)
        ]
    else:
        print('do not use msg models')
        MsgModels = [None, None]
    sess.run(tf.global_variables_initializer())
Exemple #7
0
    log_dir = os.path.join(BASE_DIR,'data/tmp'.format(args.algo))
    model_dir = os.path.join(BASE_DIR, 'data/models/{}'.format(args.algo))

    with open('gather.csv', 'w+') as myfile:
        myfile.write('{0},{1}\n'.format("Episode", "Reward"))
        
    if args.algo in ['mfq', 'mfac', 'mtmfq']:
        use_mf = True
    else:
        use_mf = False


    start_from = 0
    total_reward = []
    sess = tf.Session(config=tf_config)
    models = [spawn_ai(args.algo, sess, env, player_handles[0], args.algo + '-me', args.max_steps), spawn_ai(args.algo, sess, env, player_handles[1], args.algo + '-opponent1', args.max_steps), spawn_ai(args.algo, sess, env, player_handles[2], args.algo + '-opponent2', args.max_steps), spawn_ai(args.algo, sess, env, player_handles[3], args.algo + '-opponent3', args.max_steps)]
    sess.run(tf.global_variables_initializer())
    
    if args.algo == 'mtmfq':
        runner = tools.Runner(sess, env, handles, args.map_size, args.max_steps, models, play2,
                            render_every=args.save_every if args.render else 0, save_every=args.save_every, tau=0.01, log_name=args.algo,
                            log_dir=log_dir, model_dir=model_dir, train=True)
    
    else:
        runner = tools.Runner(sess, env, handles, args.map_size, args.max_steps, models, play,
                            render_every=args.save_every if args.render else 0, save_every=args.save_every, tau=0.01, log_name=args.algo,
                            log_dir=log_dir, model_dir=model_dir, train=True)
        
    for k in range(start_from, start_from + args.n_round):
        eps = linear_decay(k, [0, int(args.n_round * 0.8), args.n_round], [1, 0.2, 0.1])
        total_reward = runner.run(eps, k)