Beispiel #1
0
        "blocks4/task03.pddl",
        "blocks4/task04.pddl",
        "blocks4/task05.pddl",
        "blocks4/task06.pddl",
        "blocks4/task07.pddl",
        "blocks4/task08.pddl",
        "blocks4/task09.pddl",
        "blocks4/task10.pddl",
        "blocks5/task01.pddl",
        "blocks5/task02.pddl",
        "blocks5/task03.pddl",
        "blocks5/task04.pddl",
        "blocks5/task05.pddl",
        "blocks5/task06.pddl",
        "blocks5/task07.pddl",
        "blocks5/task08.pddl",
        "blocks5/task09.pddl",
        "blocks5/task10.pddl",
    ],
)
assert len(_CONFIGURATION.problems) == 30

if __name__ == "__main__":
    train_wrapper(
        args=get_training_args(
            configurations=[_CONFIGURATION],
            # 10 minutes
            max_training_time=10 * 60,
        ),
        domain_name="blocks")
Beispiel #2
0
from default_args import get_training_args, DomainAndProblemConfiguration
from train import train_wrapper

_CONFIGURATION = DomainAndProblemConfiguration(
    base_directory="../benchmarks/ferry",
    domain_pddl="ferry.pddl",
    # {2, 3, 4 locations} x {1, 2, 3} cars = 9 problems
    problem_pddls=[
        "train/ferry-l2-c1.pddl",
        "train/ferry-l2-c2.pddl",
        "train/ferry-l2-c3.pddl",
        "train/ferry-l3-c1.pddl",
        "train/ferry-l3-c2.pddl",
        "train/ferry-l3-c3.pddl",
        "train/ferry-l4-c1.pddl",
        "train/ferry-l4-c2.pddl",
        "train/ferry-l4-c3.pddl",
    ],
)
assert len(_CONFIGURATION.problems) == 9

if __name__ == "__main__":
    train_wrapper(
        args=get_training_args(
            configurations=[_CONFIGURATION],
            # 3 minutes
            max_training_time=3 * 60,
            num_folds=5,
        ),
        domain_name="ferry")
def main():
    # required for test and train:
    # parsed by the main parser - this group is shared
    g_parser = argparse.ArgumentParser(add_help=False)
    g_parser.add_argument(
        '--num-episodes',
        type=int,
        help=
        'number of running episodes (default is 1000 for train, and 5 for test'
    )
    g_parser.add_argument(
        '--build',
        default=None,
        type=str,
        required=True,
        help='path of the unity build file, to run inside Unity - enter None')
    g_parser.add_argument('--weights-path',
                          type=str,
                          required=True,
                          help='path to weights dir')
    g_parser.add_argument('--agent',
                          choices=['ddpg', 'mddpg', 'maddpg'],
                          required=True,
                          help='type of agent')
    g_parser.add_argument('--num-agents',
                          choices=range(1, 9),
                          default=4,
                          type=int,
                          metavar='[1-8]',
                          help='number of agents (cars)')
    g_parser.add_argument('--num-obstacles',
                          choices=range(0, 17),
                          default=4,
                          type=int,
                          metavar='[0-16]',
                          help='number of random obstacles')
    # general group end
    parser = argparse.ArgumentParser(
        prog='RL_Multi_agent_Cars',
        description='please choose train or test to get specific help'
        ' (e.g main.py train -h)')
    subparsers = parser.add_subparsers(help='two available running modes',
                                       dest='subparser_name')
    # define new sub-command
    subparsers.add_parser('test', help='run test mode', parents=[g_parser])
    # required for train only:
    # parse by the train command sub-parser
    train_parser = subparsers.add_parser('train',
                                         help='run train mode',
                                         parents=[g_parser])

    train_parser.add_argument(
        '--save-mem',
        action='store_true',
        help='save the replay buffer during training for later use',
    )
    train_parser.add_argument('--scores-avg-window',
                              choices=range(0, 101),
                              metavar='[0-100]',
                              default=50,
                              type=int,
                              help='number of last scores to average')
    train_parser.add_argument(
        '--load-weights',
        action='store_true',
        help='add this to load weights from previous runs')
    train_parser.add_argument(
        '--load-mem',
        action='store_true',
        help='add this to load replay buffer from previous run')
    train_parser.add_argument(
        '--mem-path',
        type=str,
        help='path of replay buffer file to load or store')
    train_parser.add_argument('--solved-score',
                              default=40,
                              type=int,
                              help='score that complete the episode')
    train_parser.add_argument(
        '--show-graphics',
        action='store_true',
        help='add this to show graphics (slows down training)')
    train_parser.add_argument(
        '--print-agent-loss',
        action='store_true',
        help='print agent\'s loss after each episode (default=False)')
    train_parser.add_argument(
        '--save-best-weights',
        action='store_true',
        help=
        'save the best weights so far (by average score). saving directory will be the'
        ' same as weights-path with suffix \'best\' default=False')
    train_parser.add_argument(
        '--save-score-log',
        action='store_true',
        help=
        'saves a csv file with the ongoing scores of each episode (default=False)'
    )
    args = parser.parse_args()
    if args.num_episodes is None:
        args.num_episodes = 1000 if args.subparser_name == 'train' else 5

    env_config = {
        'num_agents': args.num_agents,
        'num_obstacles': args.num_obstacles,
        'setting': 0
    }
    wrapper_config = vars(args)
    wrapper_config['agent'] = select_agent(wrapper_config['agent'])
    print('starting {} with arguments:\n{}'.format(args.subparser_name,
                                                   wrapper_config))
    if args.subparser_name == 'test':
        test_wrapper(env_config, wrapper_config)
    else:
        train_wrapper(env_config, wrapper_config)
Beispiel #4
0
        "train/zenotravel-cities3-planes3-people3-1826.pddl",
        "train/zenotravel-cities3-planes3-people5-4582.pddl",
    ],
)
assert len(_ZENOTRAVEL_CONFIGURATION.problems) == 10

_GRIPPER_CONFIGURATION = DomainAndProblemConfiguration(
    base_directory="../benchmarks/gripper",
    domain_pddl="domain.pddl",
    # First 3 gripper probs
    problem_pddls=[
        "problems/gripper-n1.pddl",
        "problems/gripper-n2.pddl",
        "problems/gripper-n3.pddl",
    ],
)
assert len(_GRIPPER_CONFIGURATION.problems) == 3

if __name__ == "__main__":
    train_wrapper(
        args=get_training_args(
            configurations=[
                _BLOCKSWORLD_CONFIGURATION,
                _ZENOTRAVEL_CONFIGURATION,
                _GRIPPER_CONFIGURATION,
            ],
            # 15 minutes
            max_training_time=15 * 60,
        ),
        domain_name="multi")
        "blocks5/task03.pddl",
        "blocks5/task04.pddl",
        "blocks5/task05.pddl",
    ],
)
assert len(_BLOCKSWORLD_CONFIGURATION.problems) == 10

_GRIPPER_CONFIGURATION = DomainAndProblemConfiguration(
    base_directory="../benchmarks/gripper",
    domain_pddl="domain.pddl",
    # First 3 gripper probs
    problem_pddls=[
        "problems/gripper-n1.pddl",
        "problems/gripper-n2.pddl",
        "problems/gripper-n3.pddl",
    ],
)
assert len(_GRIPPER_CONFIGURATION.problems) == 3

if __name__ == "__main__":
    train_wrapper(
        args=get_training_args(
            configurations=[
                _BLOCKSWORLD_CONFIGURATION,
                _GRIPPER_CONFIGURATION,
            ],
            # 10 minutes
            max_training_time=10 * 60,
        ),
        domain_name="indepbg")
Beispiel #6
0
_ZENOTRAVEL_CONFIGURATION = DomainAndProblemConfiguration(
    base_directory="../benchmarks/zenotravel",
    domain_pddl="domain.pddl",
    # 5 x {2, 3 cities} = 10 Zenotravel problems
    problem_pddls=[
        "train/zenotravel-cities2-planes1-people3-8798.pddl",
        "train/zenotravel-cities2-planes2-people3-9145.pddl",
        "train/zenotravel-cities2-planes3-people3-3417.pddl",
        "train/zenotravel-cities2-planes4-people2-4892.pddl",
        "train/zenotravel-cities2-planes4-people4-6874.pddl",
        "train/zenotravel-cities3-planes1-people3-4791.pddl",
        "train/zenotravel-cities3-planes2-people3-8752.pddl",
        "train/zenotravel-cities3-planes2-people5-7306.pddl",
        "train/zenotravel-cities3-planes3-people3-1826.pddl",
        "train/zenotravel-cities3-planes3-people5-4582.pddl",
    ],
)
assert len(_ZENOTRAVEL_CONFIGURATION.problems) == 10

if __name__ == "__main__":
    train_wrapper(
        args=get_training_args(
            configurations=[
                _GRIPPER_CONFIGURATION,
                _ZENOTRAVEL_CONFIGURATION,
            ],
            # 10 minutes
            max_training_time=10 * 60,
        ),
        domain_name="indepgz")
Beispiel #7
0
from default_args import get_training_args, DomainAndProblemConfiguration
from train import train_wrapper

_CONFIGURATION = DomainAndProblemConfiguration(
    base_directory="../benchmarks/gripper",
    domain_pddl="domain.pddl",
    # {1, 2, 3 balls} = 3 problems
    problem_pddls=[
        "problems/gripper-n1.pddl",
        "problems/gripper-n2.pddl",
        "problems/gripper-n3.pddl",
    ],
)
assert len(_CONFIGURATION.problems) == 3

if __name__ == "__main__":
    train_wrapper(args=get_training_args(
        configurations=[_CONFIGURATION],
        # 90 seconds
        max_training_time=90,
        num_bins=3,
    ))