def __init__(self, weight_path): super().__init__() agent_names = ['agent%d' % i for i in range(1, 4)] model = Model(agent_names, (1000, 21, 256 + 256 * 2 + 120), 'Double', (1000, 21), 0.99) self.predictors = {n: Predictor(OfflinePredictor(PredictConfig( model=model, session_init=SaverRestore(weight_path), input_names=[n + '/state', n + '_comb_mask', n + '/fine_mask'], output_names=[n + '/Qvalue'])), num_actions=(1000, 21)) for n in self.get_all_agent_names()}
def get_config(): agent_names = ['agent%d' % i for i in range(1, 4)] model = Model(agent_names, STATE_SHAPE, METHOD, NUM_ACTIONS, GAMMA) exps = [ ExpReplay( # model=model, agent_name=name, player=Env(agent_names), state_shape=STATE_SHAPE, num_actions=[MAX_NUM_COMBS, MAX_NUM_GROUPS], batch_size=BATCH_SIZE, memory_size=MEMORY_SIZE, init_memory_size=INIT_MEMORY_SIZE, init_exploration=1., update_frequency=UPDATE_FREQ) for name in agent_names ] df = MyDataFLow(exps) bl_evaluators = [ BLEvaluator(EVAL_EPISODE, agent_names[0], 2, lambda: CEnv()), BLEvaluator(EVAL_EPISODE, agent_names[1], 3, lambda: CEnv()), BLEvaluator(EVAL_EPISODE, agent_names[2], 1, lambda: CEnv()) ] return AutoResumeTrainConfig( # always_resume=False, data=QueueInput(df), model=model, callbacks=[ ModelSaver(), PeriodicTrigger(RunOp(model.update_target_param, verbose=True), every_k_steps=STEPS_PER_EPOCH // 10), # update target network every 10k steps *exps, # ScheduledHyperParamSetter('learning_rate', # [(60, 5e-5), (100, 2e-5)]), *[ ScheduledHyperParamSetter( ObjAttrParam(exp, 'exploration'), [(0, 1), (30, 0.5), (100, 0.3), (320, 0.1)], # 1->0.1 in the first million steps interp='linear') for exp in exps ], *bl_evaluators, Evaluator(EVAL_EPISODE, agent_names, lambda: Env(agent_names)), HumanHyperParamSetter('learning_rate'), ], # session_init=ChainInit([SaverRestore('../Hierarchical_Q/train_log/DQN-9-3-LASTCARDS/model-240000', 'agent1'), # SaverRestore('./train_log/DQN-60-MA/model-355000')]), # starting_epoch=0, # session_init=SaverRestore('train_log/DQN-54-AUG-STATE/model-75000'), steps_per_epoch=STEPS_PER_EPOCH, max_epoch=1000, )
def get_config(): model = Model(agent_names, STATE_SHAPE, METHOD, NUM_ACTIONS, GAMMA) exps = [ ExpReplay(agent_name=name, state_shape=STATE_SHAPE, num_actions=[MAX_NUM_COMBS, MAX_NUM_GROUPS], batch_size=BATCH_SIZE, memory_size=MEMORY_SIZE, init_memory_size=INIT_MEMORY_SIZE, init_exploration=1., update_frequency=UPDATE_FREQ, pipe_exp2sim=name_exp2sim + str(i), pipe_sim2exp=name_sim2exp + str(i)) for i, name in enumerate(agent_names) ] df = MyDataFLow(exps) return AutoResumeTrainConfig( # always_resume=False, data=QueueInput(df), model=model, callbacks=[ ModelSaver(), PeriodicTrigger(RunOp(model.update_target_param, verbose=True), every_k_steps=STEPS_PER_EPOCH // 10), # update target network every 10k steps # the following order is important coordinator, manager, *exps, # ScheduledHyperParamSetter('learning_rate', # [(60, 5e-5), (100, 2e-5)]), *[ ScheduledHyperParamSetter( ObjAttrParam(sim, 'exploration'), [(0, 1), (30, 0.5), (100, 0.3), (320, 0.1)], # 1->0.1 in the first million steps interp='linear') for sim in sims ], # Evaluator(EVAL_EPISODE, agent_names, lambda: Env(agent_names)), HumanHyperParamSetter('learning_rate'), ], session_init=ChainInit([ SaverRestore( '../TensorPack/MA_Hierarchical_Q/train_log/DQN-60-MA/model-355000' ) ]), # starting_epoch=0,k # session_init=SaverRestore('train_log/DQN-54-AUG-STATE/model-75000'), steps_per_epoch=STEPS_PER_EPOCH, max_epoch=1000, )
def __init__(self, role_id, weight_path): def role2agent(role): if role == 2: return 'agent1' elif role == 1: return 'agent3' else: return 'agent2' super().__init__(role_id) agent_names = ['agent%d' % i for i in range(1, 4)] model = Model(agent_names, (1000, 21, 256 + 256 * 2 + 120), 'Double', (1000, 21), 0.99) self.predictor = Predictor(OfflinePredictor(PredictConfig( model=model, session_init=SaverRestore(weight_path), input_names=[role2agent(role_id) + '/state', role2agent(role_id) + '_comb_mask', role2agent(role_id) + '/fine_mask'], output_names=[role2agent(role_id) + '/Qvalue'])), num_actions=(1000, 21))
parser.add_argument('--task', help='task to perform', choices=['play', 'eval', 'train'], default='train') parser.add_argument('--algo', help='algorithm', choices=['DQN', 'Double', 'Dueling'], default='Double') args = parser.parse_args() if args.gpu: os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu METHOD = args.algo # set num_actions NUM_ACTIONS = max(MAX_NUM_GROUPS, MAX_NUM_COMBS) nr_gpu = get_nr_gpu() train_tower = list(range(nr_gpu)) if args.task != 'train': assert args.load is not None pred = OfflinePredictor(PredictConfig( model=Model(), session_init=get_model_loader(args.load), input_names=['state', 'comb_mask'], output_names=['Qvalue'])) else: logger.set_logger_dir( os.path.join('train_log', 'DQN-60-MA-SELF_PLAY')) config = get_config() if args.load: config.session_init = get_model_loader(args.load) trainer = SimpleTrainer() if nr_gpu == 1 else AsyncMultiGPUTrainer(train_tower) launch_train_with_config(config, trainer)
default='train') parser.add_argument('--algo', help='algorithm', choices=['DQN', 'Double', 'Dueling'], default='Double') args = parser.parse_args() if args.gpu: os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu METHOD = args.algo # set num_actions NUM_ACTIONS = max(MAX_NUM_GROUPS, MAX_NUM_COMBS) nr_gpu = get_nr_gpu() train_tower = list(range(nr_gpu)) if args.task != 'train': assert args.load is not None pred = OfflinePredictor( PredictConfig(model=Model(), session_init=get_model_loader(args.load), input_names=['state', 'comb_mask'], output_names=['Qvalue'])) else: logger.set_logger_dir(os.path.join('train_log', 'DQN-60-MA-SELF_PLAY')) config = get_config() if args.load: config.session_init = get_model_loader(args.load) trainer = SimpleTrainer() if nr_gpu == 1 else AsyncMultiGPUTrainer( train_tower) launch_train_with_config(config, trainer)