def debug_pb2all_converter(): pb2all = PB2AllConverter(zstat_data_src=FLAGS.zstat_data_src, dict_space=True, game_version=FLAGS.game_version, delete_dup_action='v2', sort_executors='v2', inj_larv_rule=True) pb2all.reset(replay_name=FLAGS.replay_path.split('/')[-1], player_id=FLAGS.player_id, mmr=6000, map_name=FLAGS.map_name) # reset env/process replay from the beginning run_config = run_configs.get() replay_path = path.join(FLAGS.replay_path) replay_data = run_config.replay_data(replay_path) # step each frame w. step_mul with run_config.start(version=FLAGS.game_version) as controller: replay_info = controller.replay_info(replay_data) print(replay_info) controller.start_replay( sc_pb.RequestStartReplay(replay_data=replay_data, map_data=None, options=get_replay_actor_interface( FLAGS.map_name), observed_player_id=FLAGS.player_id, disable_fog=False)) controller.step() last_pb = None last_game_info = None while True: pb_obs = controller.observe() game_info = controller.game_info() if last_pb is None: last_pb = pb_obs last_game_info = game_info continue if pb_obs.player_result: # episode end, the zstat to this extent is what we need break # pb2all data = pb2all.convert(pb=(last_pb, last_game_info), next_pb=(pb_obs, game_info)) last_pb = pb_obs last_game_info = game_info # step the replay controller.step(1) # step_mul
def debug_pb2all_converter(): # reset env/process replay from the beginning run_config = run_configs.get() replay_path = path.join(FLAGS.replay_dir, FLAGS.replay_name + '.SC2Replay') replay_data = run_config.replay_data(replay_path) game_core_config = { 'show_placeholders': True, 'show_burrowed_shadows': True } # step each frame w. step_mul with run_config.start(version=FLAGS.game_version) as controller: replay_info = controller.replay_info(replay_data) #print(replay_info) # ***for debugging, VERY dangerous!!*** map_name = replay_info.map_name #map_name = 'Stasis' pb2all = PB2AllConverter( zstat_data_src=FLAGS.zstat_data_src, input_map_size=(128, 128), output_map_size=(128, 128), dict_space=True, game_version=FLAGS.game_version, zmaker_version='v5', zstat_zeroing_prob=0.0, max_bo_count=50, max_bobt_count=20, delete_dup_action='v2', sort_executors='v2', inj_larv_rule=True, add_lurker_spine_to_units=True, add_cargo_to_units=True, use_display_type=True, distinguish_effect_camp=True, lurker_effect_decay=0.999, ) pb2all.reset(replay_name=FLAGS.replay_name, player_id=FLAGS.player_id, mmr=6000, map_name=map_name) controller.start_replay( sc_pb.RequestStartReplay(replay_data=replay_data, map_data=None, options=tleague_rep_actor_interface( map_name, game_core_config=game_core_config), observed_player_id=FLAGS.player_id, disable_fog=False)) controller.step() last_pb = None last_game_info = None step = 0 while True: #print(step) pb_obs = controller.observe() game_info = controller.game_info() if last_pb is None: last_pb = pb_obs last_game_info = game_info continue if pb_obs.player_result: # episode end, the zstat to this extent is what we need break # pb2all data = pb2all.convert(pb=(last_pb, last_game_info), next_pb=(pb_obs, game_info)) last_pb = pb_obs last_game_info = game_info # printing if data: # note: data is [((obs, act), w)] act = data[0][0][1] ab_idx = act['A_AB'] print(', '.join([ FLAGS.prefix, 'gal:{}'.format(pb_obs.observation.game_loop), # game loop 'abn:{}'.format(ZERG_ABILITIES[ab_idx][0]), # ability name ])) # step the replay controller.step(1) # step_mul step += 1
def debug_pb2all_converter(): # reset env/process replay from the beginning run_config = run_configs.get() replay_path = path.join(FLAGS.replay_dir, FLAGS.replay_name + '.SC2Replay') replay_data = run_config.replay_data(replay_path) # step each frame w. step_mul with run_config.start(version=FLAGS.game_version) as controller: replay_info = controller.replay_info(replay_data) #print(replay_info) # ***for debugging, VERY dangerous!!*** map_name = replay_info.map_name #map_name = 'Stasis' pb2all = PB2AllConverter( zstat_data_src=FLAGS.zstat_data_src, input_map_size=(128, 128), output_map_size=(128, 128), dict_space=True, game_version=FLAGS.game_version, zmaker_version='v5', zstat_zeroing_prob=0.0, max_bo_count=50, max_bobt_count=20, sort_executors=True ) pb2all.reset( replay_name=FLAGS.replay_name, player_id=FLAGS.player_id, mmr=6000, map_name=map_name ) controller.start_replay(sc_pb.RequestStartReplay( replay_data=replay_data, map_data=None, options=get_replay_actor_interface(map_name), observed_player_id=FLAGS.player_id, disable_fog=False)) controller.step() last_pb = None last_game_info = None step = 0 while True: #print(step) pb_obs = controller.observe() game_info = controller.game_info() if last_pb is None: last_pb = pb_obs last_game_info = game_info continue if pb_obs.player_result: # episode end, the zstat to this extent is what we need break # pb2all data = pb2all.convert( pb=(last_pb, last_game_info), next_pb=(pb_obs, game_info)) last_pb = pb_obs last_game_info = game_info # step the replay controller.step(1) # step_mul step += 1
def mnet_v6d6_run_ppo2_loss_test(): mycfg = { 'test': False, 'use_self_fed_heads': False, 'use_value_head': True, 'use_loss_type': 'rl_ppo2', 'use_lstm': True, 'lstm_cell_type': 'lstm', 'lstm_layer_norm': True, 'batch_size': 8, 'rollout_len': 4, 'lstm_duration': 1, 'nlstm': 256, 'hs_len': 256 * 2, 'weight_decay': 0.0005, 'arg_scope_type': 'mnet_v6_type_a', 'use_base_mask': True, 'vec_embed_version': 'v3', 'last_act_embed_version': 'v2', 'zstat_embed_version': 'v4d1', 'zstat_index_base_wavelen': 555.6, 'trans_version': 'v4', 'use_astar_glu': True, 'use_astar_func_embed': True, 'n_v': 5, 'lam': 0.8, 'gather_batch': True, 'merge_pi': False, 'distillation': True, 'value_net_version': 'trans_v1', } converter = PB2AllConverter(dict_space=True, zmaker_version='v5', zstat_data_src=FLAGS.zstat_data_src, game_version=FLAGS.game_version, sort_executors='v1') ob_space, ac_space = converter.space.spaces # build the net nc = net_config_cls(ob_space, ac_space, **mycfg) nc.reward_weights = np.ones(shape=nc.reward_weights_shape, dtype=np.float32) inputs = net_inputs_placeholders_fun(nc) keys = list(inputs.X.keys()) for key in keys: inputs.X['OPPO_' + key] = inputs.X[key] out = net_build_fun(inputs, nc, scope='mnet_v6d6_rl_ppo2_loss') converter.reset(replay_name=FLAGS.replay_name, player_id=FLAGS.player_id, mmr=6000, map_name=FLAGS.map_name) run_config = run_configs.get() replay_data = run_config.replay_data( path.join(FLAGS.replay_dir, FLAGS.replay_name + '.SC2Replay')) with run_config.start(version=FLAGS.game_version) as controller: replay_info = controller.replay_info(replay_data) print(replay_info) controller.start_replay( sc_pb.RequestStartReplay(replay_data=replay_data, map_data=None, options=get_replay_actor_interface( FLAGS.map_name), observed_player_id=FLAGS.player_id, disable_fog=False)) controller.step() last_pb = None last_game_info = None sess = tf.Session() tf.global_variables_initializer().run(session=sess) while True: pb_obs = controller.observe() game_info = controller.game_info() if last_pb is None: last_pb = pb_obs last_game_info = game_info continue if pb_obs.player_result: # episode end, the zstat to this extent is what we need break # pb2all: X, A, weights as data data = converter.convert(pb=(last_pb, last_game_info), next_pb=(pb_obs, game_info)) if data: feed_dict = {} X, A = data[0][0] for key in X: feed_dict[inputs.X[key]] = [X[key]] * mycfg['batch_size'] # if 'OPPO_'+key in inputs.X: # feed_dict[inputs.X['OPPO_'+key]] = [X[key]] * mycfg['batch_size'] for key in A: feed_dict[inputs.A[key]] = [A[key]] * mycfg['batch_size'] feed_dict[inputs.neglogp[key]] = [0] * mycfg['batch_size'] feed_dict[inputs.logits[key]] = np.zeros( shape=inputs.logits[key].shape) feed_dict[inputs.r] = [[0] * nc.n_v] * mycfg['batch_size'] feed_dict[inputs.discount] = [1] * mycfg['batch_size'] feed_dict[inputs.S] = [ np.array([0] * mycfg['hs_len'], dtype=np.float32) ] * mycfg['batch_size'] feed_dict[inputs.M] = [np.array(1, dtype=np.bool) ] * mycfg['batch_size'] loss = sess.run([ out.loss.pg_loss, out.loss.value_loss, out.loss.entropy_loss, out.loss.loss_endpoints ], feed_dict) print('Loss endpoints: {}'.format(loss)) # update and step the replay last_pb = pb_obs last_game_info = game_info controller.step(1) # step_mul
def mnet_v6d6_run_il_loss_test(): mycfg = { 'test': False, 'use_self_fed_heads': False, 'use_value_head': False, 'use_loss_type': 'il', 'il_multi_label_loss': True, 'use_lstm': True, 'lstm_cell_type': 'lstm', 'lstm_layer_norm': True, 'batch_size': 2, 'rollout_len': 1, 'lstm_duration': 1, 'nlstm': 256, 'hs_len': 256 * 2, 'weight_decay': 0.0005, 'arg_scope_type': 'mnet_v6_type_a', 'use_base_mask': True, 'vec_embed_version': 'v2d1', 'last_act_embed_version': 'v2', 'zstat_embed_version': 'v7', 'trans_version': 'v5', 'gather_batch': True, 'use_astar_glu': True, 'use_astar_func_embed': True, 'pos_logits_mode': '3x3up2', 'pos_n_blk': 3, 'pos_n_skip': 2, } converter = PB2AllConverter(dict_space=True, zmaker_version='v5', zstat_data_src=FLAGS.zstat_data_src, game_version=FLAGS.game_version, sort_executors='v1', delete_dup_action='v2', input_map_size=(128, 128), output_map_size=(256, 256)) ob_space, ac_space = converter.space.spaces # build the net nc = net_config_cls(ob_space, ac_space, **mycfg) inputs = net_inputs_placeholders_fun(nc) out = net_build_fun(inputs, nc, scope='mnet_v6d6_il_loss') print('Successfully created the net.') converter.reset(replay_name=FLAGS.replay_name, player_id=FLAGS.player_id, mmr=6000, map_name=FLAGS.map_name) run_config = run_configs.get() replay_data = run_config.replay_data( path.join(FLAGS.replay_dir, FLAGS.replay_name + '.SC2Replay')) with run_config.start(version=FLAGS.game_version) as controller: replay_info = controller.replay_info(replay_data) print(replay_info) controller.start_replay( sc_pb.RequestStartReplay(replay_data=replay_data, map_data=None, options=get_replay_actor_interface( FLAGS.map_name), observed_player_id=FLAGS.player_id, disable_fog=False)) controller.step() last_pb = None last_game_info = None sess = tf.Session() tf.global_variables_initializer().run(session=sess) for _ in range(1000): pb_obs = controller.observe() game_info = controller.game_info() if last_pb is None: last_pb = pb_obs last_game_info = game_info continue if pb_obs.player_result: # episode ends, the zstat to this extent is what we need break # pb2all: X, A, weight as data data = converter.convert(pb=(last_pb, last_game_info), next_pb=(pb_obs, game_info)) if len(data) > 0: X, A = data[0][0] feed_dict = {} for key in X: feed_dict[inputs.X[key]] = [X[key]] * mycfg['batch_size'] for key in A: feed_dict[inputs.A[key]] = [A[key]] * mycfg['batch_size'] feed_dict[inputs.S] = ( [np.array([0] * mycfg['hs_len'], dtype=np.float32)] * mycfg['batch_size']) feed_dict[inputs.M] = ([np.array(1, dtype=np.bool)] * mycfg['batch_size']) loss = sess.run(out.loss.loss_endpoints, feed_dict) ab = A['A_AB'] avail_actions = np.nonzero(X['MASK_AB'])[0] avail_selections = np.nonzero(X['MASK_SELECTION'][ab])[0] selection_units = A['A_SELECT'] print('Avail action num: {}, select {}'.format( len(avail_actions), ZERG_ABILITIES[ab][0])) # print(ZERG_ABILITIES[ab][0]) print('Avail unit num: {}, select {}'.format( len(avail_selections), selection_units[:sum([(i != 600) for i in selection_units])])) print('Loss endpoints: {}'.format(loss)) # update & step the replay last_pb = pb_obs last_game_info = game_info controller.step(1) # step_mul controller.quit()
from tpolicies.net_zoo.mnet_v6.mnet_v6d6 import net_inputs_placeholders_fun from tpolicies.net_zoo.mnet_v6.mnet_v6d6 import net_build_fun FLAGS = flags.FLAGS flags.DEFINE_string("zstat_data_src", '/Users/jcxiong/SC2/rp1522-mmr-ge6000-winner-selected-8', "") flags.DEFINE_string("game_version", '4.10.0', "") flags.DEFINE_string("replay_name", 'temp', "") flags.DEFINE_string("replay_dir", '/Users/jcxiong/SC2/rp1522-mmr-ge6000-winner-selected-8/', "") flags.DEFINE_string("map_name", 'KairosJunction', "") flags.DEFINE_integer("player_id", 2, "") converter = PB2AllConverter(dict_space=True, zmaker_version='v5') ob_space, ac_space = converter.space.spaces def mnet_v6d6_inputs_test(): mycfg = { 'use_self_fed_heads': False, 'use_lstm': True, 'batch_size': 32, 'rollout_len': 1, 'lstm_duration': 4, 'nlstm': 256, 'hs_len': 256 * 2 + 1, 'lstm_cell_type': 'k_lstm' }