예제 #1
0
파일: GUI.py 프로젝트: nimisis/DeDOL
patrollers = [Patroller_CNN(args, 'pa_model' + str(i)) for i in range(5)]
poachers = [Poacher(args, 'po_model' + str(i)) for i in range(5)]

config = tf.ConfigProto()
config.gpu_options.allow_growth = True
sess = tf.Session(config=config)
sess.run(tf.global_variables_initializer())

args.po_location = None
### load the DQN models you have trained
if args.load:
    poachers[0] = Poacher_h(args, animal_density)
    patrollers[0] = Patroller_h(args, animal_density)

    poachers[1].load(sess, args.po_load_path)
    patrollers[1].load(sess, args.pa_load_path)

    test_gui(poachers[0],
             patrollers[1],
             sess,
             args,
             pa_type='DQN',
             po_type='PARAM')

### test the random sweeping patroller and the heuristic poacher
else:
    poacher = Poacher_h(args, animal_density)
    patroller = RandomSweepingPatroller(args)
    test_gui(poacher, patroller, sess, args, pa_type='RS', po_type='PARAM')
예제 #2
0
def main(wizard_args=None):

    argparser = argparse.ArgumentParser()
    ########################################################################################
    ### Presets
    argparser.add_argument('--exac_loc_always_no_noise',
                           type=bool,
                           default=False)
    argparser.add_argument('--exac_loc_always_with_noise',
                           type=bool,
                           default=False)
    argparser.add_argument('--blur_loc_always_no_noise',
                           type=bool,
                           default=False)
    argparser.add_argument('--blur_loc_always_with_noise',
                           type=bool,
                           default=False)
    argparser.add_argument('--exac_loc_50_no_noise', type=bool, default=False)

    argparser.add_argument('--exac_loc_always_no_noise_no_vis',
                           type=bool,
                           default=False)
    argparser.add_argument('--exac_loc_always_with_noise_no_vis',
                           type=bool,
                           default=False)
    argparser.add_argument('--blur_loc_always_no_noise_no_vis',
                           type=bool,
                           default=False)
    argparser.add_argument('--blur_loc_always_with_noise_no_vis',
                           type=bool,
                           default=False)
    argparser.add_argument('--exac_loc_50_no_noise_no_vis',
                           type=bool,
                           default=False)
    ### Changes by us
    argparser.add_argument('--footsteps', type=bool, default=False)
    argparser.add_argument('--po_bleeb', type=bool, default=False)
    argparser.add_argument('--filter_bleeb', type=bool, default=False)
    argparser.add_argument('--see_surrounding', type=bool, default=False)

    argparser.add_argument('--tourist_noise', type=float, default=0.01)
    argparser.add_argument('--po_scan_rate', type=float, default=0.10)

    argparser.add_argument('--extra_sensor_pa', type=bool, default=False)
    argparser.add_argument('--extra_sensor_po', type=bool, default=False)

    ### Environment
    argparser.add_argument('--row_num', type=int, default=3)
    argparser.add_argument('--column_num', type=int, default=3)
    argparser.add_argument('--ani_den_seed', type=int, default=66)

    ### Patroller
    argparser.add_argument('--pa_state_size', type=int, default=-1)  # 21
    argparser.add_argument('--pa_num_actions', type=int, default=5)

    ### Poacher CNN
    argparser.add_argument('--snare_num', type=int, default=3)
    argparser.add_argument('--po_state_size', type=int,
                           default=-1)  # add self footprint to poacher # 22
    argparser.add_argument('--po_num_actions', type=int, default=5)

    ### Poacher Rule Base, parameters set following advice from domain experts
    argparser.add_argument('--po_act_den_w', type=float, default=3.)
    argparser.add_argument('--po_act_enter_w', type=float, default=0.3)
    argparser.add_argument('--po_act_leave_w', type=float, default=-1.0)
    argparser.add_argument('--po_act_temp', type=float, default=5.0)
    argparser.add_argument('--po_home_dir_w', type=float, default=3.0)

    ### Training
    argparser.add_argument('--Delta',
                           type=float,
                           default=0.0,
                           help='the exploration rate in the meta-strategy')
    argparser.add_argument('--naive',
                           type=bool,
                           default=False,
                           help='whehter using naive PSRO')
    argparser.add_argument(
        '--advanced_training',
        type=bool,
        default=True,
        help='whether using dueling double DQN with graident clipping')
    argparser.add_argument('--map_type', type=str, default='random')
    argparser.add_argument(
        '--po_location',
        type=int,
        default=None,
        help='0, 1, 2, 3 for local modes; None for global mode')
    argparser.add_argument('--save_path',
                           type=str,
                           default='./Results_33_random/')

    argparser.add_argument('--pa_episode_num', type=int, default=300000)
    argparser.add_argument('--po_episode_num', type=int, default=300000)
    argparser.add_argument('--epi_num_incr', type=int,
                           default=0)  # no usage now
    argparser.add_argument('--final_incr_iter', type=int,
                           default=10)  # no usage now
    argparser.add_argument('--pa_replay_buffer_size', type=int, default=200000)
    argparser.add_argument('--po_replay_buffer_size', type=int, default=100000)
    argparser.add_argument('--test_episode_num', type=int, default=5000)
    argparser.add_argument('--iter_num',
                           type=int,
                           default=20,
                           help='DO iteraion num')
    argparser.add_argument('--load_path', type=str, default='./Results5x5/')
    argparser.add_argument('--load_num', type=int, default=0)
    argparser.add_argument('--pa_initial_lr', type=float, default=1e-4)
    argparser.add_argument('--po_initial_lr', type=float, default=5e-5)

    argparser.add_argument('--br_po_DQN_episode_num', type=int, default=500)
    argparser.add_argument('--print_every', type=int, default=50)
    argparser.add_argument('--zero_sum',
                           type=int,
                           default=1,
                           help='whether to set the game zero-sum')
    argparser.add_argument('--batch_size', type=int, default=32)
    argparser.add_argument('--target_update_every', type=int, default=2000)
    argparser.add_argument('--reward_gamma', type=float, default=0.95)
    argparser.add_argument('--save_every_episode', type=int,
                           default=200)  #10000)
    argparser.add_argument('--test_every_episode', type=int, default=10000)
    argparser.add_argument('--gui_every_episode', type=int, default=500)
    argparser.add_argument('--gui_test_num', type=int, default=20)
    argparser.add_argument('--gui', type=int, default=0)
    argparser.add_argument('--mix_every_episode', type=int, default=250)
    argparser.add_argument(
        '--epsilon_decrease',
        type=float,
        default=0.05,
        help='decrease of the epsilon exploration rate in DQN')
    argparser.add_argument('--PER',
                           type=bool,
                           default=False,
                           help='wheter to use prioterized experience replay')
    argparser.add_argument('--reward_shaping',
                           type=bool,
                           default=False,
                           help='whether to use reward shaping in training')

    argparser.add_argument('--max_time', type=int, default=100)
    #########################################################################################
    args = argparser.parse_args()

    if not args.po_bleeb and args.filter_bleeb:
        raise ValueError(
            'filter_bleeb cannot be true, while po_bleeb is false')

    #### PRESETS ####
    if args.exac_loc_always_no_noise:
        args.po_bleeb = True
        args.po_scan_rate = 1
        args.tourist_noise = 0
        args.filter_bleeb = False

        args.see_surrounding = True
        args.footsteps = False

        args.map_type = 'poacher'
        args.naive = True
        args.row_num = 7
        args.column_num = 7

    elif args.exac_loc_always_with_noise:
        args.po_bleeb = True
        args.po_scan_rate = 1
        args.tourist_noise = 0.05
        args.filter_bleeb = False

        args.see_surrounding = True
        args.footsteps = False

        args.map_type = 'poacher'
        args.naive = True
        args.row_num = 7
        args.column_num = 7

    elif args.blur_loc_always_no_noise:
        args.po_bleeb = True
        args.po_scan_rate = 1
        args.tourist_noise = 0
        args.filter_bleeb = True

        args.see_surrounding = True
        args.footsteps = False

        args.map_type = 'poacher'
        args.naive = True
        args.row_num = 7
        args.column_num = 7

    elif args.blur_loc_always_with_noise:
        args.po_bleeb = True
        args.po_scan_rate = 1
        args.tourist_noise = 0.05
        args.filter_bleeb = True

        args.see_surrounding = True
        args.footsteps = False

        args.map_type = 'poacher'
        args.naive = True
        args.row_num = 7
        args.column_num = 7

    elif args.exac_loc_50_no_noise:
        args.po_bleeb = True
        args.po_scan_rate = 0.5
        args.tourist_noise = 0
        args.filter_bleeb = False

        args.see_surrounding = True
        args.footsteps = False

        args.map_type = 'poacher'
        args.naive = True
        args.row_num = 7
        args.column_num = 7

    elif args.exac_loc_always_no_noise_no_vis:
        args.po_bleeb = True
        args.po_scan_rate = 1
        args.tourist_noise = 0
        args.filter_bleeb = False

        args.see_surrounding = False
        args.footsteps = False

        args.map_type = 'poacher'
        args.naive = True
        args.row_num = 7
        args.column_num = 7

    elif args.exac_loc_always_with_noise_no_vis:
        args.po_bleeb = True
        args.po_scan_rate = 1
        args.tourist_noise = 0.05
        args.filter_bleeb = False

        args.see_surrounding = False
        args.footsteps = False

        args.map_type = 'poacher'
        args.naive = True
        args.row_num = 7
        args.column_num = 7

    elif args.blur_loc_always_no_noise_no_vis:
        args.po_bleeb = True
        args.po_scan_rate = 1
        args.tourist_noise = 0
        args.filter_bleeb = True

        args.see_surrounding = False
        args.footsteps = False

        args.map_type = 'poacher'
        args.naive = True
        args.row_num = 7
        args.column_num = 7

    elif args.blur_loc_always_with_noise_no_vis:
        args.po_bleeb = True
        args.po_scan_rate = 1
        args.tourist_noise = 0.05
        args.filter_bleeb = True

        args.see_surrounding = False
        args.footsteps = False

        args.map_type = 'poacher'
        args.naive = True
        args.row_num = 7
        args.column_num = 7

    elif args.exac_loc_50_no_noise_no_vis:
        args.po_bleeb = True
        args.po_scan_rate = 0.5
        args.tourist_noise = 0
        args.filter_bleeb = False

        args.see_surrounding = False
        args.footsteps = False

        args.map_type = 'poacher'
        args.naive = True
        args.row_num = 7
        args.column_num = 7

    if wizard_args:
        for k, v in wizard_args.items():
            setattr(args, k, v)
    else:
        pass

    if args.po_state_size == -1:
        args.po_state_size = 14 + (8 * args.footsteps) + (
            1 * args.see_surrounding) + (1 * args.extra_sensor_po)

    if args.pa_state_size == -1:
        args.pa_state_size = 12 + (8 * args.footsteps) + (
            1 * args.po_bleeb) + (1 * args.see_surrounding) + (
                1 * args.extra_sensor_pa)

    print("ARGS:", args)

    ### END PRESETS ####

    if args.row_num == 10:
        args.column_num = 10
        args.max_time = 75
        args.pa_initial_lr = 1e-4
        args.po_initial_lr = 5e-5
        args.pa_replay_buffer_size = 200000
        args.po_replay_buffer_size = 100000
        if args.po_location is not None:
            args.pa_episode_num = 2000
            args.po_episode_num = 2000
    # test
    # if args.row_num == 7:
    #     args.column_num = 7
    #     args.max_time = 100
    #     args.pa_initial_lr = 1e-4
    #     args.po_initial_lr = 5e-5
    #     args.pa_replay_buffer_size = 200000
    #     args.po_replay_buffer_size = 100000
    #     if args.po_location is not None:
    #         args.pa_episode_num = 10000
    #         args.po_episode_num = 10000
    #
    # elif args.row_num == 5:
    #     args.column_num = 5
    #     args.max_time = 25
    #     args.pa_episode_num = 300000
    #     args.po_episode_num = 300000
    #     args.pa_initial_lr = 1e-4
    #     args.po_initial_lr = 5e-5
    #     args.pa_replay_buffer_size = 50000
    #     args.po_replay_buffer_size = 40000
    #     if args.po_location is not None:
    #         args.pa_episode_num = 200000
    #
    #
    # elif args.row_num == 3:
    #     args.column_num = 3
    #     args.max_time = 4
    #     args.snare_num = 3
    #     args.pa_episode_num = 500 #100000
    #     args.po_episode_num = 500 #100000
    #     args.pa_initial_lr = 5e-5
    #     args.po_initial_lr = 5e-5
    #     args.pa_replay_buffer_size = 200 #10000
    #     args.po_replay_buffer_size = 200 #8000
    #     if args.po_location is not None:
    #         args.pa_episode_num = 200 # 80000
    #         args.po_episode_num = 200 # 80000

    if args.naive:
        args.Delta = 0.0
        args.po_location = None
    else:
        pass

    # args.save_path = './' + str(args.pa_episode_num) + "_" + "filterbleeb:" + str(args.filter_bleeb) + "_touristnoise:" + \
    #                  str(args.tourist_noise) + "_footsteps:" + str(args.footsteps) + "_seesurrounding:" + str(args.see_surrounding) + \
    #                  "_poscanrate:" + str(args.po_scan_rate) + "_" + str(args.row_num) + "x" + str(args.column_num)

    if args.save_path and (not os.path.exists(args.save_path)):
        os.makedirs(args.save_path)

    with open(args.save_path + '/train_args.json', 'w') as f:
        f.write(json.dumps(vars(args)))

    paralog = open(args.save_path + '/paralog.txt', 'w')
    paralog.write('row_num {0} \n'.format(args.row_num))
    paralog.write('snare_num {0} \n'.format(args.snare_num))
    paralog.write('max_time {0} \n'.format(args.max_time))
    paralog.write('animal density seed {0} \n'.format(args.ani_den_seed))
    paralog.write('pa_initial_episode_num {0} \n'.format(args.pa_episode_num))
    paralog.write('po_initial_episode_num {0} \n'.format(args.po_episode_num))
    paralog.write('epi_num_incr {0} \n'.format(args.epi_num_incr))
    paralog.write('final_incr_iter {0} \n'.format(args.final_incr_iter))
    paralog.write('pa_replay_buffer_size {0} \n'.format(
        args.pa_replay_buffer_size))
    paralog.write('po_replay_buffer_size {0} \n'.format(
        args.po_replay_buffer_size))
    paralog.write('pa_initial_lr {0} \n'.format(args.pa_initial_lr))
    paralog.write('po_initial_lr {0} \n'.format(args.po_initial_lr))
    paralog.write('test_episode_num {0} \n'.format(args.test_episode_num))
    paralog.write('Delta {0} \n'.format(args.Delta))
    paralog.write('po_location {0} \n'.format(str(args.po_location)))
    paralog.write('map_type {0} \n'.format(str(args.map_type)))

    paralog.write('filter_bleeb {0} \n'.format(str(args.naive)))
    paralog.write('po_bleeb {0} \n'.format(str(args.naive)))
    paralog.write('naive {0} \n'.format(str(args.naive)))
    paralog.write('naive {0} \n'.format(str(args.naive)))
    paralog.flush()
    paralog.close()

    ################## for initialization ###########################
    global log_file

    log_file = open(args.save_path + '/log.txt', 'w')

    animal_density = generate_map(args)
    env_pa = Env(args,
                 animal_density,
                 cell_length=None,
                 canvas=None,
                 gui=False)
    env_po = Env(args,
                 animal_density,
                 cell_length=None,
                 canvas=None,
                 gui=False)

    patrollers = [
        Patroller_CNN(args, 'pa_model' + str(i))
        for i in range(args.iter_num + 1)
    ]
    poachers = [
        Poacher(args, 'po_model' + str(i)) for i in range(args.iter_num + 1)
    ]
    pa_type = ['DQN']
    po_type = ['DQN']

    ### initialize poachers needed for training a separate best-response poacher DQN
    br_poacher = Poacher(args, 'br_poacher')
    br_target_poacher = Poacher(args, 'br_target_poacher')
    br_good_poacher = Poacher(args, 'br_good_poacher')
    br_utility = np.zeros(2)

    if not args.naive:
        patrollers[0] = RandomSweepingPatroller(args, mode=args.po_location)
        pa_type[0] = 'RS'
    if not args.naive:
        poachers[0] = Poacher_h(args, animal_density)
        po_type[0] = 'PARAM'

    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    sess = tf.Session(config=config)
    sess.run(tf.global_variables_initializer())

    ### copy ops needed for training a separate best-response poacher DQN
    br_po_copy_ops = tf_copy(br_target_poacher, br_poacher, sess)
    br_po_good_copy_ops = tf_copy(br_good_poacher, br_poacher, sess)

    pa_payoff = np.zeros((1, 1))
    po_payoff = np.zeros((1, 1))
    length = np.zeros((1, 1))

    pa_payoff[0, 0], po_payoff[0, 0], _ = simulate_payoff(patrollers,
                                                          poachers,
                                                          0,
                                                          0,
                                                          env_pa,
                                                          sess,
                                                          args,
                                                          pa_type=pa_type[0],
                                                          po_type=po_type[0])

    pa_strategy, po_strategy = np.array([1]), np.array([1])

    np.save(file=args.save_path + 'pa_strategy_iter_0', arr=pa_strategy)
    np.save(file=args.save_path + 'po_strategy_iter_0', arr=po_strategy)

    np.save(file=args.save_path + 'pa_payoff_iter_0', arr=pa_payoff)
    np.save(file=args.save_path + 'po_payoff_iter_0', arr=po_payoff)

    log_file.write('pa_payoff:\n' + str(pa_payoff) + '\n')
    log_file.write('po_payoff:\n' + str(po_payoff) + '\n')

    log_file.write('pa_strat:\n' + str(pa_strategy) + '\n')
    log_file.write('po_strat:\n' + str(po_strategy) + '\n')

    ############## starting DO ####################
    iteration = 1
    pa_pointer, po_pointer = 1, 1  # the pointer counting the number of strategies for pa and po.

    while (1):
        time_begin = time.time()

        pa_payoff, po_payoff, length = extend_payoff(pa_payoff, po_payoff,
                                                     length, pa_pointer + 1,
                                                     po_pointer + 1)
        po_type.append('DQN')
        pa_type.append('DQN')

        log_file.flush()

        print('\n' + 'NEW_ITERATION: ' + str(iteration) + '\n')
        log_file.write('\n' + 'NEW_ITERATION: ' + str(iteration) + '\n')

        ### compute the NE utility for both sides
        po_ne_utility = 0
        pa_ne_utility = 0
        for pa_strat in range(pa_pointer):
            for po_strat in range(po_pointer):
                po_ne_utility += pa_strategy[pa_strat] * po_strategy[
                    po_strat] * po_payoff[pa_strat, po_strat]
                pa_ne_utility += pa_strategy[pa_strat] * po_strategy[
                    po_strat] * pa_payoff[pa_strat, po_strat]

        log_file.write('last_pa_ne_utility:' + str(pa_ne_utility) + '\n')
        log_file.write('last_po_ne_utility:' + str(po_ne_utility) + '\n')
        pre_pa_strategy = pa_strategy
        pre_po_strategy = po_strategy

        ### compute the best response poacher utility
        ### 1. train a best response poacher DQN against the current pa strategy
        calc_po_best_response(br_poacher,
                              br_target_poacher,
                              br_po_copy_ops,
                              br_po_good_copy_ops,
                              patrollers,
                              pa_strategy,
                              pa_type,
                              iteration,
                              sess,
                              env_pa,
                              args,
                              br_utility,
                              0,
                              train_episode_num=args.br_po_DQN_episode_num)
        br_DQN_utility = br_utility[1]

        ### 2. test against the heuristic poacher stored in poachers[0]
        br_heuristic_utility = 0.
        for i in range(pa_pointer):
            _, po_utility, _ = simulate_payoff(patrollers,
                                               poachers,
                                               i,
                                               0,
                                               env_pa,
                                               sess,
                                               args,
                                               pa_type=pa_type[i],
                                               po_type=po_type[0])
            br_heuristic_utility += po_utility * pa_strategy[i]

        ### choose the better one
        better = 'DQN' if br_DQN_utility >= br_heuristic_utility else 'heuristic'
        br_poacher_utility = max(br_DQN_utility, br_heuristic_utility)
        log_file.write(
            'Iteration {0} poacher best response utility {1} poacher best response type {2} \n'
            .format(iteration, br_poacher_utility, better))
        print(
            'Iteration {0} poacher best response utility {1} poacher best response type {2}'
            .format(iteration, br_poacher_utility, better))

        ### train the best response agent
        ### using threading to accelerate the training
        good_patrollers = []
        good_poachers = []
        final_utility = [0.0, 0.0]
        target_patroller = Patroller_CNN(args,
                                         'target_patroller' + str(iteration))
        good_patroller = Patroller_CNN(args, 'good_patroller' + str(iteration))
        pa_copy_ops = tf_copy(target_patroller, patrollers[pa_pointer], sess)
        pa_good_copy_ops = tf_copy(good_patroller, patrollers[pa_pointer],
                                   sess)
        pa_inverse_ops = tf_copy(patrollers[pa_pointer], good_patroller, sess)

        target_poacher = Poacher(args, 'target_poacher' + str(iteration))
        good_poacher = Poacher(args, 'good_poacher' + str(iteration))
        po_copy_ops = tf_copy(target_poacher, poachers[po_pointer], sess)
        po_good_copy_ops = tf_copy(good_poacher, poachers[po_pointer], sess)
        po_inverse_ops = tf_copy(poachers[po_pointer], good_poacher, sess)

        funcs = [calc_pa_best_response, calc_po_best_response]
        params = [[
            patrollers[pa_pointer], target_patroller, pa_copy_ops,
            pa_good_copy_ops, poachers, po_strategy, po_type, iteration, sess,
            env_pa, args, final_utility, 0
        ],
                  [
                      poachers[po_pointer], target_poacher, po_copy_ops,
                      po_good_copy_ops, patrollers, pa_strategy, pa_type,
                      iteration, sess, env_po, args, final_utility, 0
                  ]]

        ### if the maximum iteration number is achieved
        if args.iter_num == iteration:
            log_file.write(
                '\n DO reaches terminating iteration {0}'.format(iteration) +
                '\n')
            log_file.write('Final Pa-payoff: \n' + str(pa_payoff) + '\n')
            log_file.write('Final Po-payoff: \n' + str(po_payoff) + '\n')
            log_file.write('Final pa_strat:\n' + str(pa_strategy) + '\n')
            log_file.write('Final po_strat:\n' + str(po_strategy) + '\n')
            log_file.write('Final pa_ne_utility:' + str(pa_ne_utility) + '\n')
            log_file.write('Final po_ne_utility:' + str(po_ne_utility) + '\n')
            log_file.flush()

            threads = []
            for i in range(2):
                process = Thread(target=funcs[i], args=params[i])
                process.start()
                threads.append(process)
            ### We now pause execution on the main thread by 'joining' all of our started threads.
            for process in threads:
                process.join()

            pa_exploit = final_utility[0] - pa_ne_utility
            po_exploit = final_utility[1] - po_ne_utility
            log_file.write('Final pa_best_response_utility:' +
                           str(final_utility[0]) + '\n')
            log_file.write('Final po_best_response_utility:' +
                           str(final_utility[1]) + '\n')
            log_file.write('Final pa exploitibility:' + str(pa_exploit) + '\n')
            log_file.write('Final po exploitibility:' + str(po_exploit) + '\n')
            break

        ### not the final iteration
        threads = []

        for i in range(2):
            process = Thread(target=funcs[i], args=params[i])
            process.start()
            threads.append(process)
        for process in threads:
            process.join()

        # calc_pa_best_response(patrollers[pa_pointer], target_patroller, pa_copy_ops, pa_good_copy_ops, poachers,
        #         po_strategy, iteration, sess, env_pa, args, final_utility,0)

        sess.run(pa_inverse_ops)
        sess.run(po_inverse_ops)

        for pa_strat in range(pa_pointer):
            pa_payoff[pa_strat, po_pointer ],po_payoff[pa_strat, po_pointer], _  = \
                simulate_payoff(patrollers, poachers, pa_strat, po_pointer, env_pa, sess, args,
                    pa_type=pa_type[pa_strat], po_type=po_type[po_pointer])

        for po_strat in range(po_pointer):
            pa_payoff[pa_pointer, po_strat],po_payoff[pa_pointer, po_strat],_  = \
                simulate_payoff(patrollers, poachers, pa_pointer, po_strat, env_pa, sess, args,
                pa_type=pa_type[pa_pointer], po_type = po_type[po_strat])

        pa_payoff[pa_pointer, po_pointer],po_payoff[pa_pointer, po_pointer],_  = \
            simulate_payoff(patrollers, poachers, pa_pointer, po_pointer, env_pa, sess, args,
            pa_type=pa_type[pa_pointer], po_type = po_type[po_pointer])

        pa_strategy, po_strategy = calc_NE_zero(pa_payoff, po_payoff,
                                                args.Delta)
        # pa_strategy, po_strategy = np.ones(iteration + 1) / (iteration + 1), np.ones(iteration + 1) / (iteration + 1)

        params[0][5] = po_strategy
        params[1][5] = pa_strategy

        po_best_response = final_utility[1]
        pa_best_response = final_utility[0]
        # for pa_strat in range(pa_pointer):
        #     po_best_response += pre_pa_strategy[pa_strat] * po_payoff[pa_strat, po_pointer]
        # for po_strat in range(po_pointer):
        #     pa_best_response += pre_po_strategy[po_strat] * pa_payoff[pa_pointer, po_strat]

        # eps_po.append(po_best_response - po_ne_utility)
        # eps_pa.append(pa_best_response - pa_ne_utility)

        log_file.write('In DO pa_best_utility:' + str(pa_best_response) + '\n')
        log_file.write('In DO po_best_utility:' + str(po_best_response) + '\n')
        # log_file.write('eps_pa: ' + str(eps_pa) + '\n')
        # log_file.write('eps_po: ' + str(eps_po) + '\n')

        ######### save models for this iteration #############
        save_name = args.save_path + 'iteration_' + str(
            iteration) + '_pa_model.ckpt'
        patrollers[pa_pointer].save(sess=sess, filename=save_name)
        save_name = args.save_path + 'iteration_' + str(
            iteration) + '_po_model.ckpt'
        poachers[po_pointer].save(sess=sess, filename=save_name)

        ### save payoff matrix and ne strategies
        np.save(file=args.save_path + 'pa_payoff_iter_' + str(iteration),
                arr=pa_payoff)
        np.save(file=args.save_path + 'po_payoff_iter_' + str(iteration),
                arr=po_payoff)
        np.save(file=args.save_path + 'pa_strategy_iter_' + str(iteration),
                arr=pa_strategy)
        np.save(file=args.save_path + 'po_strategy_iter_' + str(iteration),
                arr=po_strategy)

        log_file.write('pa_payoff:\n' + str(pa_payoff) + '\n')
        log_file.write('po_payoff:\n' + str(po_payoff) + '\n')
        log_file.write('pa_strategy:\n' + str(pa_strategy) + '\n')
        log_file.write('po_strategy:\n' + str(po_strategy) + '\n')

        iteration += 1
        pa_pointer += 1
        po_pointer += 1

        time_end = time.time()

        log_file.write('Using time: \n' + str(time_end - time_begin) + '\n')
        log_file.flush()

    log_file.close()
### initialize poachers needed for training a separate best-response poacher DQN
br_poacher = Poacher(args, 'br_poacher')
br_target_poacher = Poacher(args, 'br_target_poacher')
br_good_poacher = Poacher(args, 'br_good_poacher')
br_utility = np.zeros(2)

config = tf.ConfigProto()
config.gpu_options.allow_growth = True
sess = tf.Session(config=config)
sess.run(tf.global_variables_initializer())

### copy ops needed for training a separate best-response poacher DQN
br_po_copy_ops = tf_copy(br_target_poacher, br_poacher, sess)
br_po_good_copy_ops = tf_copy(br_good_poacher, br_poacher, sess)

patrollers[0] = RandomSweepingPatroller(args, mode=0)
poachers[0] = Poacher_h(args, animal_density)

### load pretrained models
if args.load_path is not None and args.load_num > 0:
    for i in range(1, 4):
        load_path = args.load_path + str(
            i) + '/'  ##### load_path needs to be checked
        for j in range(1, load_num + 1):
            patrollers[j].load(
                sess, load_path + 'iteration_{0}_pa_model.ckpt'.format(j))
            poachers[j].load(
                sess, load_path + 'iteration_{0}_po_model.ckpt'.format(j))
            pa_copy_op = tf_copy(patrollers[i * load_num + j], patrollers[j],
                                 sess)
            po_copy_op = tf_copy(poachers[i * load_num + j], poachers[j], sess)
예제 #4
0
def main(wizard_args=None):

    argparser = argparse.ArgumentParser()
    ########################################################################################
    ### Presets
    argparser.add_argument('--exac_loc_always_no_noise',
                           type=bool,
                           default=False)
    argparser.add_argument('--exac_loc_always_with_noise',
                           type=bool,
                           default=False)
    argparser.add_argument('--blur_loc_always_no_noise',
                           type=bool,
                           default=False)
    argparser.add_argument('--blur_loc_always_with_noise',
                           type=bool,
                           default=False)
    argparser.add_argument('--exac_loc_50_no_noise', type=bool, default=False)

    argparser.add_argument('--exac_loc_always_no_noise_no_vis',
                           type=bool,
                           default=False)  #no files for this
    argparser.add_argument('--exac_loc_always_with_noise_no_vis',
                           type=bool,
                           default=False)  #no files for this
    argparser.add_argument('--blur_loc_always_no_noise_no_vis',
                           type=bool,
                           default=False)
    argparser.add_argument('--blur_loc_always_with_noise_no_vis',
                           type=bool,
                           default=False)
    argparser.add_argument('--exac_loc_50_no_noise_no_vis',
                           type=bool,
                           default=False)  #no files for this

    ### Changes by us
    argparser.add_argument('--footsteps', type=bool, default=False)
    argparser.add_argument('--po_bleeb', type=bool, default=False)
    argparser.add_argument('--filter_bleeb', type=bool, default=False)
    argparser.add_argument('--see_surrounding', type=bool, default=False)

    argparser.add_argument('--tourist_noise', type=float, default=0.01)
    argparser.add_argument('--po_scan_rate', type=float, default=0.10)

    argparser.add_argument('--extra_sensor_pa', type=bool, default=False)
    argparser.add_argument('--extra_sensor_po', type=bool, default=False)

    ### Test parameters
    argparser.add_argument('--pa_load_path', type=str, default='./Results5x5/')
    argparser.add_argument('--po_load_path', type=str, default='./Results5x5/')
    argparser.add_argument('--load', type=bool, default=False)

    ### Environment
    argparser.add_argument('--row_num', type=int, default=3)
    argparser.add_argument('--column_num', type=int, default=3)
    argparser.add_argument('--ani_den_seed', type=int, default=66)
    argparser.add_argument('--max_time', type=int, default=50)

    ### Patroller
    argparser.add_argument('--pa_state_size', type=int, default=-1)
    argparser.add_argument('--pa_num_actions', type=int, default=5)

    ### Poacher CNN
    argparser.add_argument('--snare_num', type=int, default=1)
    argparser.add_argument('--po_state_size', type=int,
                           default=-1)  # yf: add self footprint to poacher
    argparser.add_argument('--po_num_actions', type=int, default=5)

    ### Poacher Rule Base
    argparser.add_argument('--po_act_den_w', type=float, default=3.)
    argparser.add_argument('--po_act_enter_w', type=float, default=0.3)
    argparser.add_argument('--po_act_leave_w', type=float, default=-1.0)
    argparser.add_argument('--po_act_temp', type=float, default=5.0)
    argparser.add_argument('--po_home_dir_w', type=float, default=3.0)

    ### Training
    argparser.add_argument('--map_type', type=str, default='random')
    argparser.add_argument('--advanced_training', type=bool, default=True)
    argparser.add_argument('--save_path',
                           type=str,
                           default='./Results33Parandom/')

    argparser.add_argument('--naive', type=bool, default=False)
    argparser.add_argument('--pa_episode_num', type=int, default=300000)
    argparser.add_argument('--po_episode_num', type=int, default=300000)
    argparser.add_argument('--pa_initial_lr', type=float, default=1e-4)
    argparser.add_argument('--po_initial_lr', type=float, default=5e-5)
    argparser.add_argument('--epi_num_incr', type=int, default=0)
    argparser.add_argument('--final_incr_iter', type=int, default=10)
    argparser.add_argument('--pa_replay_buffer_size', type=int, default=200000)
    argparser.add_argument('--po_replay_buffer_size', type=int, default=100000)
    argparser.add_argument('--test_episode_num', type=int, default=20000)
    argparser.add_argument('--iter_num', type=int, default=10)
    argparser.add_argument('--po_location', type=int, default=None)
    argparser.add_argument('--Delta', type=float, default=0.0)

    argparser.add_argument('--print_every', type=int, default=50)
    argparser.add_argument('--zero_sum', type=int, default=1)
    argparser.add_argument('--batch_size', type=int, default=32)
    argparser.add_argument('--target_update_every', type=int, default=2000)
    argparser.add_argument('--reward_gamma', type=float, default=0.95)
    argparser.add_argument('--save_every_episode', type=int, default=5000)
    argparser.add_argument('--test_every_episode', type=int, default=2000)
    argparser.add_argument('--gui_every_episode', type=int, default=500)
    argparser.add_argument('--gui_test_num', type=int, default=20)
    argparser.add_argument('--gui', type=int, default=0)
    argparser.add_argument('--mix_every_episode', type=int,
                           default=250)  # new added
    argparser.add_argument('--epsilon_decrease', type=float,
                           default=0.05)  # new added
    argparser.add_argument('--reward_shaping', type=bool, default=False)
    argparser.add_argument('--PER', type=bool, default=False)

    #########################################################################################
    args = argparser.parse_args()

    if not args.po_bleeb and args.filter_bleeb:
        raise ValueError(
            'filter_bleeb cannot be true, while po_bleeb is false')

    #### PRESETS ####
    # print("HUH", args)
    # print("WIZARD:", wizard_args)
    # print("JAAA", args.exac_loc_always_with_noise)
    #

    # if args.row_num == 7:
    #     args.column_num = 7
    #     args.max_time = 75

    if wizard_args:
        for k, v in wizard_args.items():
            setattr(args, k, v)
    else:
        pass

    if args.exac_loc_always_no_noise:
        args.po_bleeb = True
        args.po_scan_rate = 1
        args.tourist_noise = 0
        args.filter_bleeb = False

        args.see_surrounding = True
        args.footsteps = False

    elif args.exac_loc_always_with_noise:
        # print("JA DIT TRIGGERED")
        args.po_bleeb = True
        args.po_scan_rate = 1
        args.tourist_noise = 0.05
        args.filter_bleeb = False

        args.column_num = 7
        args.row_num = 7
        args.map_type = "poacher"
        #log_file = open('./Results_33_random/log.txt', 'w')
        args.see_surrounding = True
        args.footsteps = False

    elif args.blur_loc_always_no_noise:
        args.po_bleeb = True
        args.po_scan_rate = 1
        args.tourist_noise = 0
        args.filter_bleeb = True

        args.column_num = 7
        args.row_num = 7
        args.map_type = "poacher"
        args.see_surrounding = True
        args.footsteps = False

    elif args.blur_loc_always_with_noise:
        args.po_bleeb = True
        args.po_scan_rate = 1
        args.tourist_noise = 0.05
        args.filter_bleeb = False

        args.column_num = 7
        args.row_num = 7
        args.map_type = "poacher"
        args.see_surrounding = True
        args.footsteps = False

    elif args.exac_loc_50_no_noise:
        args.po_bleeb = True
        args.po_scan_rate = 0.5
        args.tourist_noise = 0
        args.filter_bleeb = False

        args.see_surrounding = True
        args.footsteps = False

    elif args.exac_loc_always_no_noise_no_vis:
        args.po_bleeb = True
        args.po_scan_rate = 1
        args.tourist_noise = 0
        args.filter_bleeb = False

        args.see_surrounding = False
        args.footsteps = False

        args.map_type = 'poacher'
        args.naive = True
        args.row_num = 7
        args.column_num = 7

    elif args.exac_loc_always_with_noise_no_vis:
        args.po_bleeb = True
        args.po_scan_rate = 1
        args.tourist_noise = 0.05
        args.filter_bleeb = False

        args.see_surrounding = False
        args.footsteps = False

        args.map_type = 'poacher'
        args.naive = True
        args.row_num = 7
        args.column_num = 7

    elif args.blur_loc_always_no_noise_no_vis:
        args.po_bleeb = True
        args.po_scan_rate = 1
        args.tourist_noise = 0
        args.filter_bleeb = True

        args.see_surrounding = False
        args.footsteps = False

        args.map_type = 'poacher'
        args.naive = True
        args.row_num = 7
        args.column_num = 7

    elif args.blur_loc_always_with_noise_no_vis:
        args.po_bleeb = True
        args.po_scan_rate = 1
        args.tourist_noise = 0
        args.filter_bleeb = True

        args.see_surrounding = False
        args.footsteps = False

        args.map_type = 'poacher'
        args.naive = True
        args.row_num = 7
        args.column_num = 7

    elif args.exac_loc_50_no_noise_no_vis:
        args.po_bleeb = True
        args.po_scan_rate = 0.5
        args.tourist_noise = 0
        args.filter_bleeb = False

        args.see_surrounding = False
        args.footsteps = False

        args.map_type = 'poacher'
        args.naive = True
        args.row_num = 7
        args.column_num = 7

    if args.po_state_size == -1:
        args.po_state_size = 14 + (8 * args.footsteps) + (
            1 * args.see_surrounding) + (1 * args.extra_sensor_po)

    if args.pa_state_size == -1:
        args.pa_state_size = 12 + (8 * args.footsteps) + (
            1 * args.po_bleeb) + (1 * args.see_surrounding) + (
                1 * args.extra_sensor_pa)

    print("ARGS IN GUI:", args)
    ################## for initialization ###########################
    global log_file

    # log_file = open('./Results_33_random/log.txt', 'w')
    # log_file = open('./Results_33_random/log.txt', 'w')

    animal_density = generate_map(args)
    # env = Env(args, animal_density, cell_length=None, canvas=None, gui=False)

    patrollers = [Patroller_CNN(args, 'pa_model' + str(i)) for i in range(5)]
    poachers = [Poacher(args, 'po_model' + str(i)) for i in range(5)]

    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    sess = tf.Session(config=config)
    sess.run(tf.global_variables_initializer())

    args.po_location = None
    ### load the DQN models you have trained
    if args.load:
        # poachers[0] = Poacher_h(args, animal_density)
        # patrollers[0] = Patroller_h(args, animal_density)

        poachers[1].load(sess, args.po_load_path)
        patrollers[1].load(sess, args.pa_load_path)

        test_gui(poachers[1],
                 patrollers[1],
                 sess,
                 args,
                 pa_type='DQN',
                 po_type='DQN')

    ### test the random sweeping patroller and the heuristic poacher
    else:
        poacher = Poacher_h(args, animal_density)
        patroller = RandomSweepingPatroller(args)
        test_gui(poacher, patroller, sess, args, pa_type='RS', po_type='PARAM')
예제 #5
0
    Patroller_CNN(args, 'pa_model' + str(i)) for i in range(args.iter_num + 1)
]
poachers = [
    Poacher(args, 'po_model' + str(i)) for i in range(args.iter_num + 1)
]
pa_type = ['DQN']
po_type = ['DQN']

### initialize poachers needed for training a separate best-response poacher DQN
br_poacher = Poacher(args, 'br_poacher')
br_target_poacher = Poacher(args, 'br_target_poacher')
br_good_poacher = Poacher(args, 'br_good_poacher')
br_utility = np.zeros(2)

if not args.naive:
    patrollers[0] = RandomSweepingPatroller(args, mode=args.po_location)
    pa_type[0] = 'RS'
if not args.naive:
    poachers[0] = Poacher_h(args, animal_density)
    po_type[0] = 'PARAM'

config = tf.ConfigProto()
config.gpu_options.allow_growth = True
sess = tf.Session(config=config)
sess.run(tf.global_variables_initializer())

### copy ops needed for training a separate best-response poacher DQN
br_po_copy_ops = tf_copy(br_target_poacher, br_poacher, sess)
br_po_good_copy_ops = tf_copy(br_good_poacher, br_poacher, sess)

pa_payoff = np.zeros((1, 1))