Esempio n. 1
0
def parse_args():
    p = argparse.ArgumentParser()
    g = p.add_argument_group('I/O')
    g.add_argument('--record_to',
                   '-rt',
                   type=str,
                   default=None,
                   help="file path to which game will be recorded.")
    g.add_argument(
        '--playback_from',
        '-p',
        type=str,
        default=None,
        help=
        'file path from which game will be played-back (overrides record_to)')
    g.add_argument('--playback_initial_round',
                   '-pir',
                   type=int,
                   default=0,
                   help='round in which to start the playback')
    g.add_argument('--playback_final_round',
                   '-pfr',
                   type=int,
                   default=1000,
                   help='round in which to end the playback')
    g.add_argument(
        '--log_file',
        '-l',
        type=str,
        default=None,
        help="a path to which game events are logged. default: game.log")
    g.add_argument(
        '--output_file',
        '-o',
        type=str,
        default=None,
        help=
        "a path to a file in which game results are written. default: game.out"
    )
    g.add_argument('--to_render',
                   '-r',
                   type=int,
                   default=0,
                   help="whether game should not be rendered")
    g.add_argument(
        '--render_rate',
        '-rr',
        type=float,
        default=0.1,
        help=
        'frames per second, note that the policy_wait_time bounds on the rate')

    g = p.add_argument_group('Game')
    g.add_argument('--board_size',
                   '-bs',
                   type=str,
                   default='(20,60)',
                   help='a tuple of (height, width)')
    g.add_argument('--obstacle_density',
                   '-od',
                   type=float,
                   default=.04,
                   help='the density of obstacles on the board')
    g.add_argument('--policy_wait_time',
                   '-pwt',
                   type=float,
                   default=0.01,
                   help='seconds to wait for policies to respond with actions')
    g.add_argument('--random_food_prob',
                   '-fp',
                   type=float,
                   default=.2,
                   help='probability of a random food appearing in a round')
    g.add_argument(
        '--max_item_density',
        '-mid',
        type=float,
        default=.25,
        help='maximum item density in the board (not including the players)')
    g.add_argument(
        '--food_ratio',
        '-fr',
        type=float,
        default=.2,
        help=
        'the ratio between a corpse and the number of food items it produces')
    g.add_argument('--game_duration',
                   '-D',
                   type=int,
                   default=10000,
                   help='number of rounds in the session')
    g.add_argument('--policy_action_time',
                   '-pat',
                   type=float,
                   default=0.01,
                   help='seconds to wait for agents to respond with actions')
    g.add_argument('--policy_learn_time',
                   '-plt',
                   type=float,
                   default=0.1,
                   help='seconds to wait for agents to improve policy')
    g.add_argument(
        '--player_init_time',
        '-pit',
        type=float,
        default=PLAYER_INIT_TIME,
        help=
        'seconds to wait for agents to initialize in the beginning of the session'
    )

    g = p.add_argument_group('Players')
    g.add_argument(
        '--score_scope',
        '-s',
        type=int,
        default=1000,
        help=
        'The score is the average reward during the last score_scope rounds of the session'
    )
    g.add_argument('--init_player_size',
                   '-is',
                   type=int,
                   default=5,
                   help='player length at start, minimum is 3')
    g.add_argument(
        '--policies',
        '-P',
        type=str,
        default=None,
        help=
        'a string describing the policies to be used in the game, of the form: '
        '<policy_name>(<arg=val>,*);+.\n'
        'e.g. MyPolicy(layer1=100,layer2=20);YourPolicy(your_params=123)')

    args = p.parse_args()

    # set defaults
    code_path = os.path.split(os.path.abspath(__file__))[0] + os.path.sep
    if not args.record_to:
        args.__dict__['record_to'] = None
    if args.log_file is None:
        args.__dict__['log_file'] = code_path + 'game.log'
    if args.output_file is None:
        args.__dict__['output_file'] = code_path + 'game.out'
    if args.playback_from is not None:
        args.__dict__['record_to'] = None
        args.__dict__['output_file'] = None
        args.__dict__['log_file'] = None

    args.__dict__['board_size'] = [
        int(x) for x in args.board_size[1:-1].split(',')
    ]
    plcs = []
    if args.policies is not None: plcs.extend(args.policies.split(';'))
    args.__dict__['policies'] = [base_policy.build(p) for p in plcs]

    return args
Esempio n. 2
0
def parse_args():
    p = argparse.ArgumentParser()

    g = p.add_argument_group('I/O')
    g.add_argument('--record_to',
                   '-rt',
                   type=str,
                   default=None,
                   help="file name to which session will be recorded.")
    g.add_argument(
        '--playback_from',
        '-p',
        type=str,
        default=None,
        help=
        'file name from which game will be played-back (overrides record_to)')
    g.add_argument(
        '--log_file',
        '-l',
        type=str,
        default=None,
        help="a path to which game events are logged. default: game.log")
    g.add_argument(
        '--output_file',
        '-o',
        type=str,
        default=None,
        help=
        "a path to a file in which game results and model paths are written. default: game.out"
    )
    g.add_argument(
        '--model_folder',
        '-mf',
        type=str,
        default=None,
        help=
        "a folder to which agents may record their model'. default: this file's folder, .\states\ "
    )
    g.add_argument(
        '--record_folder',
        '-rf',
        type=str,
        default=None,
        help=
        "a folder to which games are recorded'. default: this file's folder, .\\recordings\ "
    )

    g.add_argument_group('Playback')
    g.add_argument('--initial_playback_round',
                   '-ipr',
                   type=int,
                   default=None,
                   help='which round to start the playback from')
    g.add_argument('--final_playback_round',
                   '-fpr',
                   type=int,
                   default=None,
                   help='which round to end the playback at')
    g.add_argument('--playback_round_skip',
                   '-prs',
                   type=int,
                   default=None,
                   help='how many rounds to skip over between game playbacks')
    g.add_argument(
        '--render_rate',
        '-rr',
        type=float,
        default=1.0,
        help='seconds to wait between actions when a game is being played back'
    )

    g = p.add_argument_group('Game')
    g.add_argument('--policy_action_time',
                   '-pat',
                   type=float,
                   default=0.01,
                   help='seconds to wait for agents to respond with actions')
    g.add_argument('--policy_learn_time',
                   '-plt',
                   type=float,
                   default=0.1,
                   help='seconds to wait for agents to improve policy')
    g.add_argument('--game_duration',
                   '-D',
                   type=int,
                   default=100,
                   help='number of rounds')
    g.add_argument('--test_or_train',
                   '-t',
                   type=str,
                   default='train',
                   help='whether this is a training session or a test session')
    g.add_argument('--board_initializer',
                   '-bi',
                   type=str,
                   default='RandomBoard',
                   help='function to be used for board initialization')
    g.add_argument('--selfplay',
                   '-sp',
                   type=bool,
                   default=False,
                   help='whether or not this is a selfplay session')
    g.add_argument('--selfplay_time',
                   '-spt',
                   type=int,
                   default=10000,
                   help='whether or not this is a selfplay session')

    g = p.add_argument_group('Agents')
    g.add_argument(
        '--agents',
        '-A',
        type=str,
        default=None,
        help=
        'a string describing the two agents to be used in the game, of the form: '
        '<agent_name>(<arg=val>,*);<agent_name>(<arg=val>,*)\n'
        'e.g. MyAgent(layer1=100,layer2=20);YourAgent(your_params=123)')

    args = p.parse_args()

    # set defaults for I/O:
    code_path = os.path.split(os.path.abspath(__file__))[0] + os.path.sep
    if args.log_file is None:
        args.__dict__['log_file'] = code_path + 'game.log'
    if args.model_folder is None:
        args.__dict__['model_folder'] = code_path + 'models' + os.path.sep
    if not os.path.exists(args.model_folder):
        os.mkdir(args.model_folder)
    if args.record_folder is None:
        args.__dict__['record_folder'] = code_path + 'recordings' + os.path.sep
    if not os.path.exists(args.record_folder):
        os.mkdir(args.record_folder)
    if args.output_file is None:
        args.__dict__['output_file'] = code_path + 'game.out'
    if args.playback_from is not None:
        args.__dict__['record_to'] = None
        args.__dict__['output_file'] = None
        args.__dict__['log_file'] = None
        args.__dict__['model_folder'] = None

    # parse agents:
    agents = []
    if args.agents is not None: agents.extend(args.agents.split(';'))
    if args.playback_from is None:
        assert len(agents) == 2, "Wrong Number of Players!"

    args.__dict__['agents'] = [base_policy.build(agent) for agent in agents]
    args.__dict__['board_initializer'] = base_initializer.find_initializer(
        args.__dict__['board_initializer'])
    args.__dict__['board_initializer'] = args.__dict__['board_initializer']()

    return args