def init_trainer(self, args): if args.gpuid: print('Running with GPU {}.'.format(args.gpuid[0])) cuda.set_device(args.gpuid[0]) else: print('Running with CPU.') if args.random_seed: random.seed(args.random_seed + os.getpid()) np.random.seed(args.random_seed + os.getpid()) schema = Schema(args.schema_path) scenario_db = ScenarioDB.from_dict(schema, read_json(args.scenarios_path), Scenario) valid_scenario_db = ScenarioDB.from_dict( schema, read_json(args.valid_scenarios_path), Scenario) # if len(args.agent_checkpoints) == 0 # assert len(args.agent_checkpoints) <= len(args.agents) if len(args.agent_checkpoints) < len(args.agents): ckpt = [None] * 2 else: ckpt = args.agent_checkpoints systems = [ get_system(name, args, schema, False, ckpt[i]) for i, name in enumerate(args.agents) ] rl_agent = 0 system = systems[rl_agent] model = system.env.model loss = None # optim = build_optim(args, [model, system.env.critic], None) optim = { 'model': build_optim(args, model, None), 'critic': build_optim(args, system.env.critic, None) } optim['critic']._set_rate(0.05) scenarios = { 'train': scenario_db.scenarios_list, 'dev': valid_scenario_db.scenarios_list } from neural.a2c_trainer import RLTrainer as A2CTrainer trainer = A2CTrainer(systems, scenarios, loss, optim, rl_agent, reward_func=args.reward, cuda=(len(args.gpuid) > 0), args=args) self.args = args self.trainer = trainer self.systems = systems
schema = Schema(args.schema_path) scenario_db = ScenarioDB.from_dict(schema, read_json(args.scenarios_path), Scenario) valid_scenario_db = ScenarioDB.from_dict( schema, read_json(args.valid_scenarios_path), Scenario) assert len(args.agent_checkpoints) <= len(args.agents) systems = [ get_system(name, args, schema, False, args.agent_checkpoints[i]) for i, name in enumerate(args.agents) ] rl_agent = 0 system = systems[rl_agent] model = system.env.model loss = make_loss(args, model, system.mappings['tgt_vocab']) optim = build_optim(args, model, None) scenarios = { 'train': scenario_db.scenarios_list, 'dev': valid_scenario_db.scenarios_list } trainer = RLTrainer(systems, scenarios, loss, optim, rl_agent, reward_func=args.reward, reward_beta=args.reward_beta) trainer.learn(args)
parser.add_argument('--critic-path', default=None, help='Output path for the critic Model') cocoa.options.add_scenario_arguments(parser) options.add_system_arguments(parser) options.add_rl_arguments(parser) options.add_model_arguments(parser) args = parser.parse_args() if args.random_seed: random.seed(args.random_seed) np.random.seed(args.random_seed) schema = Schema(args.schema_path) scenario_db = ScenarioDB.from_dict(schema, read_json(args.scenarios_path), Scenario) valid_scenario_db = ScenarioDB.from_dict(schema, read_json(args.valid_scenarios_path), Scenario) assert len(args.agent_checkpoints) <= len(args.agents) systems = [get_system(name, args, schema, False, args.agent_checkpoints[i]) for i, name in enumerate(args.agents)] rl_agent = 0 system = systems[rl_agent] model = system.env.model critic_model = system.env.critic_model loss = make_loss(args, model, system.mappings['tgt_vocab']) optim = build_optim(args, model, None) critic_optim = build_optim(args, critic_model, None) scenarios = {'train': scenario_db.scenarios_list, 'dev': valid_scenario_db.scenarios_list} trainer = CriticTrainer(systems, scenarios, loss, critic_optim, rl_agent, reward_func=args.reward, cuda=(len(args.gpuid) > 0)) trainer.learn(args)
ckpt = [None] * 2 else: ckpt = args.agent_checkpoints systems = [ get_system(name, args, schema, False, ckpt[i]) for i, name in enumerate(args.agents) ] rl_agent = 0 system = systems[rl_agent] model = system.env.model loss = None # optim = build_optim(args, [model, system.env.critic], None) optim = { 'model': build_optim(args, model, None), 'critic': build_optim(args, system.env.critic, None) } optim['critic']._set_rate(0.05) scenarios = { 'train': scenario_db.scenarios_list, 'dev': valid_scenario_db.scenarios_list } from neural.a2c_trainer import RLTrainer as A2CTrainer trainer = A2CTrainer(systems, scenarios, loss, optim, rl_agent, reward_func=args.reward,