class AdHoc(SARSATile): startAdvice = None learningEpisodes = None budgetAsk = 0 budgetAdvise = 0 spentBudgetAsk = 0 spentBudgetAdvise = 0 scalingVisits = math.exp(10) lastStatus = agent.IN_GAME #Enum for importance metrics VISIT_IMPORTANCE, Q_IMPORTANCE = range(2) stateImportanceMetric = None adviceObject = None ASK, ADVISE = range(2) visitTable = None advisedState = None informAction = None #must be informed in subclass def __init__(self, budgetAsk, budgetAdvise, stateImportanceMetric, seed=12345, port=12345, epsilon=0.1, alpha=0.1, gamma=0.9, decayRate=0.9, serverPath="/home/leno/HFO/bin/"): super(AdHoc, self).__init__(seed=seed, port=port, serverPath=serverPath) self.name = "AdHoc" self.visitTable = {} self.advisedState = {} self.budgetAsk = budgetAsk self.budgetAdvise = budgetAdvise self.stateImportanceMetric = stateImportanceMetric self.startAdvice = 1 self.learningEpisodes = 0 def select_action(self, stateFeatures, state, noAdvice=False): """Changes the exploration strategy""" if self.exploring and self.spentBudgetAsk < self.budgetAsk and stateFeatures[ self. ABLE_KICK] == 1 and not noAdvice and self.learningEpisodes >= self.startAdvice: #Check if it should ask for advice ask = self.check_ask(state) if ask: #---- #Ask for advice #---- #In case the agent will communicate its intended action if self.informAction: normalAction = super(AdHoc, self).select_action( stateFeatures, state) else: normalAction = None advised = self.adviceObject.ask_advice(self.get_Unum(), stateFeatures, normalAction) if advised: try: self.advisedState[self.quantize_features(state)] = True self.spentBudgetAsk = self.spentBudgetAsk + 1 action = self.combineAdvice(advised) return action except: print "Exception when combining the advice " + str( advised) #No need to compute two times the intended action if self.informAction: return normalAction return super(AdHoc, self).select_action(stateFeatures, state, noAdvice) @abc.abstractmethod def check_advise(self, stateFeatures, state): """Returns if the agent should advice in this state. The advised action is also returned in the positive case""" #importance = self.state_importance(state,self.stateImportanceMetric) #midpoint = self.midpoint(self.ADVISE) #Calculates the probability #prob = self.calc_prob_adv(importance,midpoint,self.ADVISE) ## #processedState = self.quantize_features(state) #numberVisits = self.number_visits(processedState) #if importance>0: #print str(importance)+" - "+str(prob) ## #Check if the agent should advise #if random.random() < prob and prob > 0.1: #advisedAction = self.select_action(stateFeatures,state,True) #return True,advisedAction #return False,None def combineAdvice(self, advised): return int(max(set(advised), key=advised.count)) def state_importance(self, state, typeProb): """Calculates the state importance state - the state typeProb - is the state importance being calculated in regard to the number of visits or also by Q-table values?""" processedState = self.quantize_features(state) numberVisits = self.number_visits(processedState) if numberVisits == 0: return 0.0 visitImportance = numberVisits / ( numberVisits + math.log(self.scalingVisits + numberVisits)) if typeProb == self.VISIT_IMPORTANCE: return visitImportance elif typeProb == self.Q_IMPORTANCE: maxQ = -float("inf") minQ = float("inf") #Get max and min Q value actions = [self.DRIBBLE, self.SHOOT, self.PASSfar, self.PASSnear] for act in actions: if (processedState, act) in self.qTable: actQ = self.qTable.get((processedState, act)) if actQ > maxQ: maxQ = actQ if actQ < minQ: minQ = actQ # print "MaxQ "+str(maxQ) # print "MinQ "+str(minQ) # print "len "+str(len(actions)) qImportance = math.fabs(maxQ - minQ) #* len(actions) if qImportance == float('inf'): return 0.0 #if qImportance != 0: #print str(qImportance) + " - "+str(visitImportance) return qImportance / (1 - visitImportance) #If the agent got here, it is an error return None def step(self, state, action): """Modifies the default step action just to include a state visit counter""" if self.exploring: processedState = self.quantize_features(state) self.visitTable[processedState] = self.visitTable.get( processedState, 0.0) + 1 status, statePrime, actionPrime = super(AdHoc, self).step(state, action) self.lastStatus = status if self.lastStatus != self.IN_GAME: self.advisedState = {} if self.exploring: self.learningEpisodes += 1 return status, statePrime, actionPrime @abc.abstractmethod def check_ask(self, state): """Returns if the agent should ask for advise in this state""" #if self.exploring and not (self.quantize_features(state) in self.advisedState): # importance = self.state_importance(state,self.VISIT_IMPORTANCE) # midpoint = self.midpoint(self.ASK) #Calculates the probability # prob = self.calc_prob_adv(importance,midpoint,self.ASK) ## #processedState = self.quantize_features(state) #numberVisits = self.number_visits(processedState) #print str(numberVisits)+" - "+str(prob) ## # if random.random() < prob and prob > 0.1: # return True #return False #Call default sarsa method if no action was selected def calc_prob_adv(self, importance, midpoint, typeProb): """Calculates the probability of giving/receiving advice importance - the current state importance midpoint - the midpoint for the logistic function typeProb - ASK or ADVISE """ signal = 1 if typeProb == self.ASK else -1 k = 10 prob = 1 / (1 + math.exp(signal * k * (importance - midpoint))) return prob def advise_action(self, uNum, state, adviseeAction=None): """Verifies if the agent can advice a friend, and return the action if possible""" if self.spentBudgetAdvise < self.budgetAdvise: #Check if the agent should advise advise, advisedAction = self.check_advise( state, self.get_transformed_features(state)) if advise: if adviseeAction is None or advisedAction != adviseeAction: self.spentBudgetAdvise = self.spentBudgetAdvise + 1 return advisedAction return None def setupAdvising(self, agentIndex, allAgents): """ This method is called in preparation for advising """ self.adviceObject = AdviceUtil() advisors = [x for i, x in enumerate(allAgents) if i != agentIndex] self.adviceObject.setupAdvisors(advisors) def get_used_budget(self): return self.spentBudgetAdvise @abc.abstractmethod def midpoint(self, typeMid): """Calculates the midpoint""" pass def number_visits(self, state): return self.visitTable.get(state, 0.0)
class Torrey(SARSATile): budget = 0 spentBudget = 0 lastStatus = agent.IN_GAME adviceObject = None advisedState = None informAction = None def __init__(self, budget=1000,threshold = 0.01,seed=12345, port=12345, serverPath = "/home/leno/HFO/bin/"): super(Torrey, self).__init__(seed=seed,port=port,serverPath=serverPath) self.name = "Torrey" self.advisedState = {} self.budget = budget self.threshold = threshold self.informAction = False def step(self, state, action): """Modifies the default step action just to include a state visit counter""" status, statePrime, actionPrime = super(Torrey, self).step(state,action) self.lastStatus = status if self.lastStatus != self.IN_GAME: self.advisedState = {} return status, statePrime, actionPrime def select_action(self, stateFeatures, state, noAdvice = False): """Changes the exploration strategy""" if self.exploring and stateFeatures[self.ABLE_KICK] == 1 and not noAdvice and not (self.quantize_features(state) in self.advisedState): #Ask for advice if self.informAction: normalAction = super(Torrey, self).select_action(stateFeatures,state,True) else: normalAction = None advised = self.adviceObject.ask_advice(self.get_Unum(),stateFeatures,normalAction) if advised: try: self.advisedState[self.quantize_features(state)] = True action = self.combineAdvice(advised) return action except: print "Exception when combining the advice " + str(advised) #No need to compute again the intended action if self.informAction: return normalAction #else: # if self.exploring and stateFeatures[self.ABLE_KICK] == 1: # with open("debugTorrey.log","a") as myfile: # #if importance>0: # myfile.write("Exp - "+str(self.exploring)+",AbleKick = "+str(stateFeatures[self.ABLE_KICK] == 1)+", NoAdvice: "+str(noAdvice) +", Advised?: "+str(not (self.quantize_features(state) in self.advisedState))+"\n") return super(Torrey, self).select_action(stateFeatures,state,noAdvice) def combineAdvice(self,advised): return int(max(set(advised), key=advised.count)) def advise_action(self,uNum,state,intendedAction=None): """Verifies if the agent can advice a friend, and return the action if possible""" if self.spentBudget < self.budget: #Check if the agent should advise advise,advisedAction = self.check_advise(state,self.get_transformed_features(state)) if advise: if intendedAction is None or advisedAction!=intendedAction: self.spentBudget = self.spentBudget + 1 return advisedAction return None def check_advise(self,stateFeatures,state): """Returns if the agent should advice in this state. The advised action is also returned in the positive case""" importance = self.state_importance(state) #with open("debugTorrey.log","a") as myfile: #if importance>0: # myfile.write("Importance "+str(importance)+" - ") if importance > self.threshold: advisedAction = self.select_action(stateFeatures,state,True) return True,advisedAction return False,None def state_importance(self,state): """Calculates the state importance state - the state typeProb - is the state importance being calculated in regard to the number of visits or also by Q-table values?""" processedState = self.quantize_features(state) maxQ = -float("inf") minQ = float("inf") #Get max and min Q value actions = [self.DRIBBLE, self.SHOOT, self.PASSfar, self.PASSnear] for act in actions: if (processedState,act) in self.qTable: actQ = self.qTable.get((processedState, act),0) if actQ > maxQ: maxQ = actQ if actQ < minQ: minQ = actQ #print "MaxQ "+str(maxQ)+" - MinQ "+str(minQ) #print "MinQ "+str(minQ) # print "len "+str(len(actions)) qImportance = math.fabs(maxQ - minQ) return qImportance def get_used_budget(self): """Returns the ask budget the agent already used""" return self.spentBudget def setupAdvising(self,agentIndex,allAgents): """ This method is called in preparation for advising """ self.adviceObject = AdviceUtil() #Get the next agent index = (agentIndex+1)%len(allAgents) advisors = [allAgents[index]] self.adviceObject.setupAdvisors(advisors)
class Torrey(QLearning): fellowAgents = None spentBudget = None budget = None episodeUpdateTrace = None threshold = None def __init__(self, agentIndex, alpha=0.2, gamma=0.9, T=0.4, budget=350, threshold=0.001): super(Torrey, self).__init__(agentIndex, alpha=alpha, gamma=gamma, T=T) self.budget = budget self.spentBudget = 0 self.fellowAgents = [] self.threshold = threshold def setup_advising(self, agentIndex, allAgents): """ This method is called in preparation for advising """ self.adviceObject = AdviceUtil() #Get the next agent index = (agentIndex + 1) % len(allAgents) advisors = [allAgents[index]] self.adviceObject.setupAdvisors(advisors) def get_used_budget(self): return self.spentBudget def advise_action(self, uNum, state): """Verifies if the agent can advice a friend, and return the action if possible""" if self.spentBudget < self.budget: #Check if the agent should advise advise, advisedAction = self.check_advise(state) if advise: self.spentBudget = self.spentBudget + 1 return advisedAction return None def check_advise(self, state): """Returns if the agent should advice in this state. The advised action is also returned in the positive case""" importance = self.state_importance(state) if importance > self.threshold: advisedAction = self.action(state, True) return True, advisedAction return False, None def state_importance(self, state): """Calculates the state importance state - the state typeProb - is the state importance being calculated in regard to the number of visits or also by Q-table values?""" allActions = [actions.NORTH, actions.SOUTH, actions.WEST, actions.EAST] maxQ, minQ = self.get_max_min_q_value(state, allActions) qImportance = math.fabs(maxQ - minQ) return qImportance def observe_reward(self, state, action, statePrime, reward): """Does the necessary updates (Q-table, etc)""" super(Torrey, self).observe_reward(state, action, statePrime, reward) def combineAdvice(self, advised): return int(max(set(advised), key=advised.count)) def action(self, state, noAdvice=False): """Returns the action for the current state""" if self.exploring and not noAdvice and state[0] != float('inf'): #Ask for advice advised = self.adviceObject.ask_advice(self.agentIndex, state) if advised: try: action = self.combineAdvice(advised) return action except: print "Exception when combining the advice " + str(advised) return super(Torrey, self).action(state, noAdvice)
class AdHoc(QLearning): spentBudgetAsk = None spentBudgetAdv = None budgetAsk = None budgetAdv = None visitedNumber = None advisedState = None adviceObject = None def __init__(self,agentIndex,alpha=0.2,gamma=0.9,T=0.4,budgetAsk = 350,budgetAdv = 350): super(AdHoc, self).__init__(agentIndex,alpha=alpha,gamma=gamma,T=T) self.budgetAsk = budgetAsk self.budgetAdv = budgetAdv self.spentBudgetAsk = 0 self.spentBudgetAdv = 0 self.fellowAgents = [] self.visitedNumber = {} self.advisedState = {} def setup_advising(self,agentIndex,allAgents): """ This method is called in preparation for advising """ self.adviceObject = AdviceUtil() fellows = [x for i,x in enumerate(allAgents) if i!=agentIndex] self.adviceObject.setupAdvisors(fellows) def get_used_budget(self): return self.spentBudgetAdv def advise_action(self,uNum,state): """Verifies if the agent can advice a friend, and return the action if possible""" if self.spentBudgetAdv < self.budgetAdv: #Check if the agent should advise advise,advisedAction = self.check_advise(state) if advise: self.spentBudgetAdv = self.spentBudgetAdv + 1 return advisedAction return None @abc.abstractmethod def check_advise(self,state): """Returns if the agent should advice in this state. The advised action is also returned in the positive case""" pass @abc.abstractmethod def check_ask(self,state): """Returns if the agent should advice in this state. The advised action is also returned in the positive case""" pass def observe_reward(self,state,action,statePrime,reward) : """Does the necessary updates (Q-table, etc)""" super(AdHoc, self).observe_reward(state,action,statePrime,reward) if reward==1: #terminal state self.advisedState = {} def combineAdvice(self,advised): return int(max(set(advised), key=advised.count)) def action(self,state,noAdvice = False): """Returns the action for the current state""" if self.exploring and not noAdvice: self.visitedNumber[state] = self.visitedNumber.get(state,0) + 1 ask = self.check_ask(state) if ask: #Ask for advice advised = self.adviceObject.ask_advice(self.agentIndex,state) if advised: try: self.spentBudgetAsk = self.spentBudgetAsk + 1 action = self.combineAdvice(advised) self.advisedState[state] = True return action except: print "Exception when combining the advice " + str(advised) return super(AdHoc, self).action(state,noAdvice)
class Torrey(QLearning): fellowAgents = None spentBudget = None budget = None episodeUpdateTrace = None threshold = None def __init__(self,agentIndex,alpha=0.2,gamma=0.9,T=0.4,budget = 350,threshold = 0.05): super(Torrey, self).__init__(agentIndex,alpha=alpha,gamma=gamma,T=T) self.budget = budget self.spentBudget = 0 self.fellowAgents = [] self.threshold = threshold def setup_advising(self,agentIndex,allAgents): """ This method is called in preparation for advising """ self.adviceObject = AdviceUtil() #Get the next agent index = (agentIndex+1)%len(allAgents) advisors = [allAgents[index]] self.adviceObject.setupAdvisors(advisors) def get_used_budget(self): return self.spentBudget def advise_action(self,uNum,state): """Verifies if the agent can advice a friend, and return the action if possible""" if self.spentBudget < self.budget: #Check if the agent should advise advise,advisedAction = self.check_advise(state) if advise: self.spentBudget = self.spentBudget + 1 return advisedAction return None def check_advise(self,state): """Returns if the agent should advice in this state. The advised action is also returned in the positive case""" importance = self.state_importance(state) if importance > self.threshold: advisedAction = self.action(state,True) return True,advisedAction return False,None def state_importance(self,state): """Calculates the state importance state - the state typeProb - is the state importance being calculated in regard to the number of visits or also by Q-table values?""" allActions = [actions.NORTH, actions.SOUTH, actions.WEST, actions.EAST] maxQ,minQ = self.get_max_min_q_value(state,allActions) qImportance = math.fabs(maxQ - minQ) return qImportance def observe_reward(self,state,action,statePrime,reward) : """Does the necessary updates (Q-table, etc)""" super(Torrey, self).observe_reward(state,action,statePrime,reward) def combineAdvice(self,advised): return int(max(set(advised), key=advised.count)) def action(self,state,noAdvice = False): """Returns the action for the current state""" if self.exploring and not noAdvice and state[0] != float('inf'): #Ask for advice advised = self.adviceObject.ask_advice(self.agentIndex,state) if advised: try: action = self.combineAdvice(advised) return action except: print "Exception when combining the advice " + str(advised) return super(Torrey, self).action(state,noAdvice)