Exemple #1
0
    def init_trainer(self, args):
        if args.gpuid:
            print('Running with GPU {}.'.format(args.gpuid[0]))
            cuda.set_device(args.gpuid[0])
        else:
            print('Running with CPU.')

        if args.random_seed:
            random.seed(args.random_seed + os.getpid())
            np.random.seed(args.random_seed + os.getpid())

        schema = Schema(args.schema_path)
        scenario_db = ScenarioDB.from_dict(schema,
                                           read_json(args.scenarios_path),
                                           Scenario)
        valid_scenario_db = ScenarioDB.from_dict(
            schema, read_json(args.valid_scenarios_path), Scenario)

        # if len(args.agent_checkpoints) == 0
        # assert len(args.agent_checkpoints) <= len(args.agents)
        if len(args.agent_checkpoints) < len(args.agents):
            ckpt = [None] * 2
        else:
            ckpt = args.agent_checkpoints

        systems = [
            get_system(name, args, schema, False, ckpt[i])
            for i, name in enumerate(args.agents)
        ]

        rl_agent = 0
        system = systems[rl_agent]
        model = system.env.model
        loss = None
        # optim = build_optim(args, [model, system.env.critic], None)
        optim = {
            'model': build_optim(args, model, None),
            'critic': build_optim(args, system.env.critic, None)
        }
        optim['critic']._set_rate(0.05)

        scenarios = {
            'train': scenario_db.scenarios_list,
            'dev': valid_scenario_db.scenarios_list
        }
        from neural.a2c_trainer import RLTrainer as A2CTrainer
        trainer = A2CTrainer(systems,
                             scenarios,
                             loss,
                             optim,
                             rl_agent,
                             reward_func=args.reward,
                             cuda=(len(args.gpuid) > 0),
                             args=args)

        self.args = args
        self.trainer = trainer
        self.systems = systems
Exemple #2
0
    schema = Schema(args.schema_path)
    scenario_db = ScenarioDB.from_dict(schema, read_json(args.scenarios_path),
                                       Scenario)
    valid_scenario_db = ScenarioDB.from_dict(
        schema, read_json(args.valid_scenarios_path), Scenario)

    assert len(args.agent_checkpoints) <= len(args.agents)
    systems = [
        get_system(name, args, schema, False, args.agent_checkpoints[i])
        for i, name in enumerate(args.agents)
    ]

    rl_agent = 0
    system = systems[rl_agent]
    model = system.env.model
    loss = make_loss(args, model, system.mappings['tgt_vocab'])
    optim = build_optim(args, model, None)

    scenarios = {
        'train': scenario_db.scenarios_list,
        'dev': valid_scenario_db.scenarios_list
    }
    trainer = RLTrainer(systems,
                        scenarios,
                        loss,
                        optim,
                        rl_agent,
                        reward_func=args.reward,
                        reward_beta=args.reward_beta)
    trainer.learn(args)
Exemple #3
0
    parser.add_argument('--critic-path', default=None, help='Output path for the critic Model')

    cocoa.options.add_scenario_arguments(parser)
    options.add_system_arguments(parser)
    options.add_rl_arguments(parser)
    options.add_model_arguments(parser)
    args = parser.parse_args()

    if args.random_seed:
        random.seed(args.random_seed)
        np.random.seed(args.random_seed)

    schema = Schema(args.schema_path)
    scenario_db = ScenarioDB.from_dict(schema, read_json(args.scenarios_path), Scenario)
    valid_scenario_db = ScenarioDB.from_dict(schema, read_json(args.valid_scenarios_path), Scenario)

    assert len(args.agent_checkpoints) <= len(args.agents)
    systems = [get_system(name, args, schema, False, args.agent_checkpoints[i]) for i, name in enumerate(args.agents)]

    rl_agent = 0
    system = systems[rl_agent]
    model = system.env.model
    critic_model = system.env.critic_model
    loss = make_loss(args, model, system.mappings['tgt_vocab'])
    optim = build_optim(args, model, None)
    critic_optim = build_optim(args, critic_model, None)

    scenarios = {'train': scenario_db.scenarios_list, 'dev': valid_scenario_db.scenarios_list}
    trainer = CriticTrainer(systems, scenarios, loss, critic_optim, rl_agent, reward_func=args.reward, cuda=(len(args.gpuid) > 0))
    trainer.learn(args)
Exemple #4
0
        ckpt = [None] * 2
    else:
        ckpt = args.agent_checkpoints

    systems = [
        get_system(name, args, schema, False, ckpt[i])
        for i, name in enumerate(args.agents)
    ]

    rl_agent = 0
    system = systems[rl_agent]
    model = system.env.model
    loss = None
    # optim = build_optim(args, [model, system.env.critic], None)
    optim = {
        'model': build_optim(args, model, None),
        'critic': build_optim(args, system.env.critic, None)
    }
    optim['critic']._set_rate(0.05)

    scenarios = {
        'train': scenario_db.scenarios_list,
        'dev': valid_scenario_db.scenarios_list
    }
    from neural.a2c_trainer import RLTrainer as A2CTrainer
    trainer = A2CTrainer(systems,
                         scenarios,
                         loss,
                         optim,
                         rl_agent,
                         reward_func=args.reward,