コード例 #1
0
ファイル: draftstate.py プロジェクト: systats/swainBot
    def __init__(self, team, champ_ids = get_champion_ids(), num_positions = 5, draft = Draft('default')):
        #TODO (Devin): This should make sure that numChampions >= num_positions
        self.num_champions = len(champ_ids)
        self.num_positions = num_positions
        self.num_actions = (self.num_positions+1)*self.num_champions
        self.state_index_to_champ_id = {i:k for i,k in zip(range(self.num_champions),champ_ids)}
        self.champ_id_to_state_index = {k:i for i,k in zip(range(self.num_champions),champ_ids)}
        self.state = np.zeros((self.num_champions, self.num_positions+2), dtype=bool)
        self.picks = []
        self.bans = []
        self.selected_pos = []

        self.team = team
        self.draft_structure = draft
        # Get phase information from draft
        self.BAN_PHASE_LENGTHS = self.draft_structure.PHASE_LENGTHS[DraftState.BAN_PHASE]
        self.PICK_PHASE_LENGTHS = self.draft_structure.PHASE_LENGTHS[DraftState.PICK_PHASE]

        # The dicts pos_to_pos_index and pos_index_to_pos contain the mapping
        # from position labels to indices to the state matrix and vice versa.
        self.positions = [i-1 for i in range(num_positions+2)]
        self.pos_indices = [1,0]
        self.pos_indices.extend(range(2,num_positions+2))
        self.pos_to_pos_index = dict(zip(self.positions,self.pos_indices))
        self.pos_index_to_pos = dict(zip(self.pos_indices,self.positions))
コード例 #2
0
def dueling_networks(path_to_model):
    valid_champ_ids = cinfo.get_champion_ids()
    # Two states are maintained: one corresponding to the perception of the draft
    # according to each of the teams.
    blue_state = DraftState(DraftState.BLUE_TEAM, valid_champ_ids)
    red_state = DraftState(DraftState.RED_TEAM, valid_champ_ids)
    draft = {0: blue_state, 1: red_state}
    with tf.Session() as sess:
        saver = tf.train.import_meta_graph(
            "{path}.ckpt.meta".format(path=path_to_model))
        saver.restore(sess, "{path}.ckpt".format(path=path_to_model))
        online_out = tf.get_default_graph().get_tensor_by_name(
            "online/outputs:0")
        online_pred = tf.get_default_graph().get_tensor_by_name(
            "online/prediction:0")
        online_input = tf.get_default_graph().get_tensor_by_name(
            "online/inputs:0")
        online_secondary_input = tf.get_default_graph().get_tensor_by_name(
            "online/secondary_inputs:0")

        submission_count = 0
        while (blue_state.evaluate() != DraftState.DRAFT_COMPLETE
               and red_state.evaluate() != DraftState.DRAFT_COMPLETE):
            active_team = get_active_team(submission_count)
            inactive_team = 0 if active_team else 1
            print("active {}".format(active_team))
            state = draft[active_team]
            pred_act = sess.run(online_pred,
                                feed_dict={
                                    online_input: [state.format_state()],
                                    online_secondary_input:
                                    [state.format_secondary_inputs()]
                                })
            cid, pos = state.format_action(pred_act[0])
            print("cid={} pos={}".format(cid, pos))
            # Update active state
            state.update(cid, pos)
            # Update inactive state, remembering to mask non-bans submitted by opponent
            inactive_pos = pos if pos == -1 else 0
            draft[inactive_team].update(cid, inactive_pos)
            submission_count += 1

    return draft
コード例 #3
0
ファイル: match_processing.py プロジェクト: systats/swainBot
def process_match(match, team, augment_data=True):
    """
    process_match takes an input match and breaks each incremental pick and ban down the draft into experiences (aka "memories").

    Args:
        match (dict): match dictionary with pick and ban data for a single game.
        team (DraftState.BLUE_TEAM or DraftState.RED_TEAM): The team perspective that is used to process match
            The selected team has the positions for each pick explicitly included with the experience while the
            "opposing" team has the assigned positions for its champion picks masked.
        augment_data (optional) (bool): flag controlling the randomized ordering of submissions that do not affect the draft as a whole
    Returns:
        experiences ( list(tuple) ): list of experience tuples. Each experience is of the form (s, a, r, s') where:
            - s and s' are DraftState states before and after a single action
            - a is the (stateIndex, position) tuple of selected champion to be banned or picked. position = 0 for submissions
                by the opposing team
            - r is the integer reward obtained from submitting the action a

    process_match() can take the vantage from both sides of the draft to parse for memories. This means we can ultimately sample from
    both winning drafts (positive reinforcement) and losing drafts (negative reinforcement) when training.
    """
    experiences = []
    valid_champ_ids = get_champion_ids()

    # This section controls data agumentation of the match. Certain submissions in the draft are
    # submitted consecutively by the same team during the same phase (ie team1 pick0 -> team1 pick1).
    # Although these submissions were produced in a particular order, from a draft perspective
    # there is no difference between submissions of the form
    # team1 pick0 -> team1 pick1 vs team1 pick1 -> team0 pickA
    # provided that the two picks are from the same phase (both bans or both picks).
    # Therefore it is possible to augment the order in which these submissions are processed.

    # Note that we can also augment the banning phase if desired. Although these submissions technically
    # fall outside of the conditions listed above, in practice bans made in the same phase are
    # interchangable in order.

    # Build queue of actions from match reference (augmenting if desired)
    augments_list = [
        ("blue","bans",slice(0,3)), # Blue bans 0,1,2 are augmentable
        ("blue","bans",slice(3,5)), # Blue bans 3,4 are augmentable
        ("red","bans",slice(0,3)),
        ("red","bans",slice(3,5)),
        ("blue","picks",slice(1,3)), # Blue picks 1,2 are augmentable
        ("blue","picks",slice(3,5)), # Blue picks 3,4 are augmentable
        ("red","picks",slice(0,2)) # Red picks 0,1 are augmentable
    ]
    if(augment_data):
        augmented_match = deepcopy(match) # Deepcopy match to avoid side effects
        for aug in augments_list:
            (k1,k2,aug_range) = aug
            count = len(augmented_match[k1][k2][aug_range])
            augmented_match[k1][k2][aug_range] = random.sample(augmented_match[k1][k2][aug_range],count)

        action_queue = build_action_queue(augmented_match)
    else:
        action_queue = build_action_queue(match)

    # Set up draft state
    draft = DraftState(team,valid_champ_ids)

    finish_memory = False
    while action_queue:
        # Get next pick from deque
        submission = action_queue.popleft()
        (submitting_team, pick, position) = submission

        # There are two conditions under which we want to finalize a memory:
        # 1. Non-designated team has finished submitting picks for this phase (ie next submission belongs to the designated team)
        # 2. Draft is complete (no further picks in the draft)
        if submitting_team == team:
            if finish_memory:
                # This is case 1 to store memory
                r = get_reward(draft, match, a, a)
                s_next = deepcopy(draft)
                memory = (s, a, r, s_next)
                experiences.append(memory)
                finish_memory = False
            # Memory starts when upcoming pick belongs to designated team
            s = deepcopy(draft)
            # Store action = (champIndex, pos)
            a = (pick, position)
            finish_memory = True
        else:
            # Mask positions for pick submissions belonging to the non-designated team
            if position != -1:
                position = 0

        draft.update(pick, position)

    # Once the queue is empty, store last memory. This is case 2 above.
    # There is always an outstanding memory at the completion of the draft.
    # RED_TEAM always gets last pick. Therefore:
    #   if team = BLUE_TEAM -> There is an outstanding memory from last RED_TEAM submission
    #   if team = RED_TEAM -> Memory is open from just before our last submission
    if(draft.evaluate() == DraftState.DRAFT_COMPLETE):
        assert finish_memory == True
        r = get_reward(draft, match, a, a)
        s_next = deepcopy(draft)
        memory = (s, a, r, s_next)
        experiences.append(memory)
    else:
        print("{} vs {}".format(match["blue_team"],match["red_team"]))
        draft.display()
        print("Error code {}".format(draft.evaluate()))
        print("Number of experiences {}".format(len(experiences)))
        for experience in experiences:
            _,a,_,_ = experience
            print(a)
        print("")#raise

    return experiences
コード例 #4
0
ファイル: main.py プロジェクト: systats/swainBot
from draftstate import DraftState
import champion_info as cinfo
import match_processing as mp

from models import qNetwork, softmax
from trainer import DDQNTrainer, SoftmaxTrainer
from models.inference_model import QNetInferenceModel, SoftmaxInferenceModel

import tensorflow as tf

print("")
print("********************************")
print("** Beginning Swain Bot Run! **")
print("********************************")

valid_champ_ids = cinfo.get_champion_ids()
print("Number of valid championIds: {}".format(len(valid_champ_ids)))

# Store training match data in a json file (for reuse later)
reuse_matches = True
val_count = 40
save_match_pool = False

validation_ids = []
training_ids = []
if reuse_matches:
    print("Using match data in match_pool.txt.")
    with open('match_pool.txt', 'r') as infile:
        data = json.load(infile)
    validation_ids = data["validation_ids"]
    training_ids = data["training_ids"]
コード例 #5
0
def self_train(sess, explore_prob, n_experiences=1):
    """
    Runs model currently held in TF Session sess through one self training loop. Returns
    negative memory if model fails to complete draft.
    Args:
        sess (tf.Session()): TF Session used to run model.
        explore_prob (float): Probability that each pick will explore state space by submitting a random action
        n_experiences (int): Number of experiences desired.
    Returns:
        experiences [(s,a,r,s')]: list of expierence tuples from illegal submissions made by either side of draft
        None if network completes draft without illegal actions
    """
    MAX_DRAFT_ITERATIONS = 100  # Maximum number of drafts to iterate through
    assert n_experiences > 0, "Number of experiences must be non-negative"
    valid_champ_ids = cinfo.get_champion_ids()
    match = {"winner": None}  # Blank match for rewards processing
    # Two states are maintained: one corresponding to the perception of the draft
    # according to each of the teams.
    blue_state = DraftState(DraftState.BLUE_TEAM, valid_champ_ids)
    red_state = DraftState(DraftState.RED_TEAM, valid_champ_ids)
    # Draft dictionary holds states for each perspective
    draft = {0: blue_state, 1: red_state}

    online_pred = tf.get_default_graph().get_tensor_by_name(
        "online/prediction:0")
    online_input = tf.get_default_graph().get_tensor_by_name("online/inputs:0")
    online_secondary_input = tf.get_default_graph().get_tensor_by_name(
        "online/secondary_inputs:0")

    experiences = []
    successful_draft_count = 0
    while (len(experiences) < n_experiences):
        if (successful_draft_count > MAX_DRAFT_ITERATIONS):
            break
        blue_state.reset()
        red_state.reset()
        submission_count = 0
        while (blue_state.evaluate() != DraftState.DRAFT_COMPLETE
               and red_state.evaluate() != DraftState.DRAFT_COMPLETE):
            active_team = get_active_team(submission_count)
            inactive_team = 0 if active_team else 1

            state = draft[active_team]
            start = deepcopy(state)

            if (random.random() < explore_prob):
                # Explore state space by submitting random action
                pred_act = [random.randint(0, state.num_actions - 1)]
            else:
                pred_act = sess.run(online_pred,
                                    feed_dict={
                                        online_input: [state.format_state()],
                                        online_secondary_input:
                                        [state.format_secondary_inputs()]
                                    })
            action = state.format_action(pred_act[0])
            if (state.is_submission_legal(*action)):
                # Update active state
                state.update(*action)
                # Update inactive state, remembering to mask non-bans submitted by opponent
                (cid, pos) = action
                inactive_pos = pos if pos == -1 else 0
                draft[inactive_team].update(cid, inactive_pos)
                submission_count += 1
            else:
                bad_state = deepcopy(state)
                bad_state.update(*action)
                experiences.append(
                    (start, action, get_reward(bad_state, match, action,
                                               None), bad_state))
                break
        successful_draft_count += 1
    return experiences