def initialize(self, game_data, player_num): # Initializng the command center, the simulator and some other things # print("Initializing") self.input_key = self.gateway.jvm.struct.Key() self.frame_data = self.gateway.jvm.struct.FrameData() self.cc = self.gateway.jvm.aiinterface.CommandCenter() self.simulate_time = 60 self.player_num = player_num self.game_data = game_data self.simulator = self.game_data.getSimulator() # print("making the state") self.state = State(self.gateway, game_data, self.cc, player_num) self.is_game_just_started = True # print("making the action") self.current_action = ActionValue(0, 0, 0) # print("action is okay!") self.weight_path_p1 = None self.weight_path_p2 = None self.in_behaviour = False self.my_last_hp = 0 self.op_last_hp = 0 # print("creating the agent") # print(self.gateway) # print(self.state) # print(self.epsilon) # print(self.gamma) # print(self.alpha) # print(self.lamb) # print("state stuff") # print(self.state.features_num) # print(self.state.player_num) # print(self.use_exp_replay) self.agent = RL(self.gateway, self.state, self.epsilon, self.gamma, self.alpha, self.lamb, self.state.features_num, self.state.player_num, self.use_exp_replay) self.action_air = [] self.action_ground = [] self.sp_skill = self.ACTION.STAND_D_DF_FC self.my_motion_data = self.game_data.getMotionData(self.player_num) self.op_motion_data = self.game_data.getMotionData(not self.player_num) self.my_actions = [] self.op_actions = [] self.agent.epsilon = self.epsilon print(self.weights_path) print(self.agent.actions_weights) self.SetWeights() return 0
def __init__(self, outfile, mode=True): # A process descriptor array PCB[16] # A resource descriptor array RCB[4] with multiunit resources # RCB[0] and RCB[1] have 1 unit each; RCB[2] has 2 units; RCB[3] has 3 units # A 3-level RL self.PCBIndex = PCBIndex() self.RL = RL() self.RCBIndex = RCBIndex() self.n = SIZE self.mode = mode if self.mode: self.output = outfile # o # this is the process at the head of RL # Erase all previous contents of the data structures PCB, RCB, RL self.start()
def update(): for episode in range(epoch): current_state = env.reset() count = 0 while True: env.render() #步数加一 count += 1 # Sarsa学习,执行action得到回报值和新的状态, ε-greedy随机选择动作,取该动作对应的Q值,再更新Q表 if (count == 1): action = RL.choose_action(str(current_state)) new_state, reward, finished_or_not = env.step(action, count, RL) new_action = RL.choose_action(str(new_state)) RL.learn(str(current_state), action, reward, str(new_state), new_action) action = new_action #Q学习,执行action得到回报值和新的状态,取新的状态的Q的最大值 ,再更新Q表 # action = RL.choose_action(str(current_state)) # new_state, reward, finished_or_not = env.step(action, count, RL) # RL.learn(str(current_state), action, reward, str(new_state)) current_state = new_state if finished_or_not: if (episode == epoch - 1): env.diaplay(RL.q_table) print(env.get_outcome()) with open(RL.__class__.__name__ + "5.txt", "w") as f: for i in env.get_outcome(): text = f.write(str(i) + "\n") f.close() time.sleep() break env.destroy()
def removeFromRL(self, ready_list : RL, to_be_deleted_id : int): ready_list.remove(j=to_be_deleted_id) return ready_list
mode = "CPU" runOnGPU = len(sys.argv)>1 #If a console parameter is received run in GPU. else run on CPU if(runOnGPU): mode = "GPU" learning_rate = 0.00001 epsilon = 0.05 discount = 0.95 _lambda = 0.80 trainEpisodes = 20 #Number of episodes to run before displaying learning stats display_frequency = 10 AI = RL(learning_rate, epsilon,discount, _lambda, display_frequency) CPUfile = Path("netCPU.pt") GPUfile = Path("netGPU.pt") if (runOnGPU and GPUfile.is_file()) or (not runOnGPU and CPUfile.is_file()): print("Loaded Network", mode) print("Learning...") if(runOnGPU): print("Implement for GPU") #AI.approximator = torch.load("netGPU.pt") #AI.QLearningGPU(trainEpisodes) #torch.save(AI.approximator, "netGPU.pt") else: AI.approximator = torch.load("netCPU.pt") AI.QLearning(trainEpisodes)
def __init__(self, actions, epsilon, alpha=0.2, gamma=1.0): RL.__init__(self, actions, epsilon, alpha, gamma)
class Manager: def __init__(self, outfile, mode=True): # A process descriptor array PCB[16] # A resource descriptor array RCB[4] with multiunit resources # RCB[0] and RCB[1] have 1 unit each; RCB[2] has 2 units; RCB[3] has 3 units # A 3-level RL self.PCBIndex = PCBIndex() self.RL = RL() self.RCBIndex = RCBIndex() self.n = SIZE self.mode = mode if self.mode: self.output = outfile # o # this is the process at the head of RL # Erase all previous contents of the data structures PCB, RCB, RL self.start() """ start(): The init function should always perform the following tasks: Create a single running process at PCB[0] with priority 0 Enter the process into the RL at the lowest-priority level 0 self.PCB._process_list[0] = Process(pid=0, prio=0) """ def start(self): self.PCBIndex.start() self.runningid = 0 self.RL.enqueue(pid=0, priority=0) if self.mode: self.output.write("\n") self.scheduler() """ create(): a new process is created once and is given a unique id, a parent, and a priority a context switch is ran in case the new process has a higher priority """ def create(self, priority: int): if (1 <= priority <= 2): if self.PCBIndex.getCurrent() >= self.n: self.displayError() else: pid = self.PCBIndex.create(priority, parent=self.runningid) parent_index = self.PCBIndex.find(self.runningid) self.PCBIndex.accessProcessChild(parent_index, pid) self.RL.enqueue(pid=pid, priority=priority) # print("process", pid, "created") # print(pid) self.scheduler() else: self.displayError() """ destroy(): removes a process and its children from the program. This includes the deletion of its PCB, removal from the ReadyList, releasing of possible resources, and the cleansing of a resources WaitList """ def destroy(self, j): if j == 0: # EXCEPTION - attempt to delete process 0 self.displayError() elif self.PCBIndex.checkIsValid(parent_id=self.runningid, pid=j): # find the set of children process may have total, set_of_children = self.PCBIndex.remove( self.runningid, j) # for all k in j destroy(k) # destroy children for pid in set_of_children: tbd_index = self.PCBIndex.find(pid) a = self.PCBIndex.process_list[tbd_index].resources while a is not None: self.releaseInDestroy(a.value[0], pid) a = a.next if self.PCBIndex.exists(pid): self.PCBIndex.removeFromParent( self.PCBIndex.process_list[tbd_index].getParent(), pid) self.RL.remove(decision=False, j=pid) self.PCBIndex.process_list[tbd_index] = None # destroy j - remove j from its parent, ready list, tbd_index = self.PCBIndex.find(j) a = self.PCBIndex.process_list[tbd_index].resources if self.PCBIndex.exists(j): self.PCBIndex.removeFromParent( self.PCBIndex.process_list[tbd_index].getParent(), j) while a is not None: self.releaseInDestroy(a.value[0], j) a = a.next self.RL.remove(decision=False, j=j) self.PCBIndex.process_list[tbd_index] = None # run through RCBs waitinglist to remove possible waiting processes alist = list(set_of_children) alist.append(j) self.RCBIndex.cleanseWaitlist(alist) # fix PCBindex in case there are any gaps self.PCBIndex.reshuffle() self.scheduler() else: # EXCEPTION - attempt to delete a process that is not there self.displayError() """ request(): the running process makes an attempt to request a number of units from a given resource if the resource has the number of requested units available, it is allocated to the running process if the resource does not have the units available, the running process is removed from the ReadyList, its state is set to Blocked, and is placed on the requested resource's WaitList """ def request(self, resource, numUnits): if resource < 0 or resource > 3: self.displayError() return refResource = self.RCBIndex.getResource(resource) if (self.runningid == 0 ): # EXCEPTION - requesting resource for process 0 self.displayError() elif (numUnits == 0 ): # requesting 0 units is not an exception but it does nothing self.scheduler() return # this refers to piazza post: https://piazza.com/class/kffw9nufy3m5qf?cid=35 elif (refResource == -1): # EXCEPTION - nonexistent resource self.displayError() elif (refResource.state >= numUnits): value = self.PCBIndex.accessProcessHasResource( process_index=self.PCBIndex.find(self.runningid), rid=resource) if value != -1: # has resource and can append it self.PCBIndex.appendProcessResource( process_index=self.PCBIndex.find(self.runningid), rid=resource, numUnits=numUnits) self.RCBIndex.setResource(rid=resource, k=numUnits, change=True) self.scheduler() else: # doesnt have resource and can add it to its resource list self.RCBIndex.setResource(rid=resource, k=numUnits, change=True) parent_index = self.PCBIndex.find(self.runningid) self.PCBIndex.addProcessResource(process_index=parent_index, rid=resource, numUnits=numUnits) self.scheduler() # print("received", numUnits, "units") else: value = self.PCBIndex.accessProcessHasResource( process_index=self.PCBIndex.find(self.runningid), rid=resource) if value != -1: # process has resource we must add the prexisting value to the numUnits the resource has requested if numUnits > refResource.inventory: # EXCEPTION - process is requesting more than what a resource initially has self.displayError() elif ( value + numUnits ) > refResource.inventory: # EXCEPTION - process is requesting more than what a resource has self.displayError() else: parent_index = self.PCBIndex.find(self.runningid) self.PCBIndex.accessProcessState( process_index=parent_index, state=0) self.RL.remove(decision=True, j=self.runningid) self.RCBIndex.enqueue(rid=resource, i=self.runningid, k=numUnits) self.scheduler() else: if numUnits > refResource.inventory: # EXCEPTION - process is requesting more than what a resource initially has self.displayError() return parent_index = self.PCBIndex.find(self.runningid) self.PCBIndex.accessProcessState(process_index=parent_index, state=0) self.RL.remove(decision=True, j=self.runningid) self.RCBIndex.enqueue(rid=resource, i=self.runningid, k=numUnits) self.scheduler() """ release(): releases the desired resources """ def release(self, resource, number): # check if resource is allocated to running process if resource < 0 or resource > 3: self.displayError() return value = self.PCBIndex.accessProcessHasResource( process_index=self.PCBIndex.find(self.runningid), rid=resource) if value == -1 or value < number: # EXCEPTION - Releasing a resource the process is not holding self.displayError() elif number == 0: # not exception, but attempt to release 0 units does nothing self.scheduler() return else: # remove r from resources list of process i amount_to_add = self.PCBIndex.releaseResource( self.PCBIndex.find(self.runningid), resource, number) # add amount released back into resource self.RCBIndex.setResource(resource, amount_to_add, False) refResource = self.RCBIndex.getResource(resource) if refResource.waiting_list is None: pass else: # This only grabs looks at one spot in waitinglist while (self.RCBIndex.peek(resource)): pid, amount = self.RCBIndex.dequeue(resource) process_index = self.PCBIndex.find(pid) self.PCBIndex.accessProcessState(process_index, 1) self.RCBIndex.setResource(resource, amount, True) # check if process has the resource if self.PCBIndex.accessProcessHasResource( process_index, resource) != -1: self.PCBIndex.appendProcessResource( process_index, resource, amount) else: self.PCBIndex.addProcessResource( process_index, resource, amount) self.RL.enqueue( pid, self.PCBIndex.process_list[process_index].getPriority( )) # print("resource", str(resource), "released") self.scheduler() """ timeout(): performs a purposeful context switch """ def timeout(self): # moves running process to the end of their priority level # calls scheduler at the end self.RL.timeout() self.scheduler() """ scheduler(): retrieves the highest priority process used for context switches """ def scheduler(self): # if (self.RL.findHead() != -1): # print("process " + str(self.RL.getHead()) + " is running") # print(str(self.RL.getHead())) self.RL.findHead() self.runningid = self.RL.getHead() if self.mode: self.output.write(str(self.runningid) + " ") else: print(str(self.RL.getHead())) """ Helper Functions: functions that are not explicitly required but are needed to run required ones """ def displayError(self): if self.mode: self.output.write(str(-1) + " ") else: print(-1) def releaseInDestroy(self, resource, from_pid): value = self.PCBIndex.accessProcessHasResource( process_index=self.PCBIndex.find(from_pid), rid=resource) if value == -1: # Exceptions: Releasing a resource the process is not holding # print(-1) self.displayError() else: amount_to_add = self.PCBIndex.releaseAllResource( self.PCBIndex.find(from_pid), resource) # remove r from resources list of process i self.RCBIndex.setResource( resource, amount_to_add, False) # add amount released back into resource refResource = self.RCBIndex.getResource(resource) if refResource.waiting_list is None: pass else: pid, amount = self.RCBIndex.dequeue(resource) process_index = self.PCBIndex.find(pid) if process_index != -1 and process_index != None: self.PCBIndex.accessProcessState(process_index, 1) self.RCBIndex.setResource(resource, amount, True) # check if process has the resource if self.PCBIndex.accessProcessHasResource( process_index, resource) != -1: self.PCBIndex.appendProcessResource( process_index, resource, amount) else: self.PCBIndex.addProcessResource( process_index, resource, amount) self.RL.enqueue( pid, self.PCBIndex.process_list[process_index].getPriority( )) # print("resource", str(resource), "released") # self.scheduler() """ DEBUG Functions """ def printRL(self): node = self.RL.readylist print("printRL") for i in range(0, 3): print("\nin " + str(i), end=" ") node = self.RL.readylist[i] while (node != None): print(str(node.value) + "->", end='') node = node.next print() def printPCB(self): print("print PCB") for i in range(self.PCBIndex.getCurrent()): print("Id: " + str(self.PCBIndex.process_list[i])) def printRCB(self): print("print RCB") for i in range(0, 4): print("Id: " + str(self.RCBIndex.resource_list[i]))
#Learning parameters batch_size = 64 learning_rate = 0.000001 initial_epsilon = 0.25 epsilon_decay = 0.999997 #This decay value achieves 0.97 in episode 10,000, 0.74 in episode 100,000, 0.05 in episode 1,000,000 discount = 0.95 trainEpisodes = 40000 experience_stored = 1000000 step_delta = 1000 #Number of episodes to run before displaying learning stats display_frequency = 10 AI = RL(batch_size , learning_rate, initial_epsilon, epsilon_decay, discount, experience_stored, step_delta, display_frequency, runOnGPU) CPUfile = Path("netCPU.pt") GPUfile = Path("netGPU.pt") #Load experience information from previous sessions AI.approximator.loadExperience("experience.pkl") if (runOnGPU and GPUfile.is_file()) or (not runOnGPU and CPUfile.is_file()): print("Loaded Network", mode) print("Learning...") if(runOnGPU): AI.approximator = torch.load("netGPU.pt") AI.QLearningGPU(trainEpisodes) torch.save(AI.approximator, "netGPU.pt") else: AI.approximator = torch.load("netCPU.pt")
class RLAgent(object): FRAME_AHEAD = 14 DEBUG_MODE = True epsilon = 0.01 gamma = 0.95 alpha = 0.2 lamb = 0.1 action_weights_number = 5 use_exp_replay = False weights_path = "pickled_weights.pkl" def __init__(self, gateway): self.gateway = gateway self.my_actions = [] self.op_actions = [] self.ACTION = self.gateway.jvm.enumerate.Action def close(self): pass def getInformation(self, frame_data): # Load the frame data every time getInformation gets called self.state.frame_data = frame_data self.frame_data = frame_data self.cc.setFrameData(self.frame_data, self.player_num) self.state.my_char = self.frame_data.getCharacter(self.player_num) self.state.op_char = self.frame_data.getCharacter(not self.player_num) self.my_char = self.frame_data.getCharacter(self.player_num) self.op_char = self.frame_data.getCharacter(not self.player_num) # please define this method when you use FightingICE version 3.20 or later def roundEnd(self, x, y, z): print("round ending") print(self.weights_path) f = open(self.weights_path, 'wb') print("the file is open") print(self.agent.actions_weights) print("dumping ") pickle.dump(self.agent.actions_weights, f) # f.close() print(x) print(y) print(z) # please define this method when you use FightingICE version 4.00 or later def getScreenData(self, sd): pass def initialize(self, game_data, player_num): # Initializng the command center, the simulator and some other things # print("Initializing") self.input_key = self.gateway.jvm.struct.Key() self.frame_data = self.gateway.jvm.struct.FrameData() self.cc = self.gateway.jvm.aiinterface.CommandCenter() self.simulate_time = 60 self.player_num = player_num self.game_data = game_data self.simulator = self.game_data.getSimulator() # print("making the state") self.state = State(self.gateway, game_data, self.cc, player_num) self.is_game_just_started = True # print("making the action") self.current_action = ActionValue(0, 0, 0) # print("action is okay!") self.weight_path_p1 = None self.weight_path_p2 = None self.in_behaviour = False self.my_last_hp = 0 self.op_last_hp = 0 # print("creating the agent") # print(self.gateway) # print(self.state) # print(self.epsilon) # print(self.gamma) # print(self.alpha) # print(self.lamb) # print("state stuff") # print(self.state.features_num) # print(self.state.player_num) # print(self.use_exp_replay) self.agent = RL(self.gateway, self.state, self.epsilon, self.gamma, self.alpha, self.lamb, self.state.features_num, self.state.player_num, self.use_exp_replay) self.action_air = [] self.action_ground = [] self.sp_skill = self.ACTION.STAND_D_DF_FC self.my_motion_data = self.game_data.getMotionData(self.player_num) self.op_motion_data = self.game_data.getMotionData(not self.player_num) self.my_actions = [] self.op_actions = [] self.agent.epsilon = self.epsilon print(self.weights_path) print(self.agent.actions_weights) self.SetWeights() return 0 def SetWeights(self): try: f = open(self.weights_path, 'r') mutli_feat = pickle.load(f) f.close() self.agent.SetMultipleWeights(multi_feat) except: multi_feat = [] for i in range(self.action_weights_number): feat = [0.0] * self.state.features_num # print(feat) multi_feat.append(feat) self.agent.SetMultipleWeights(multi_feat) def input(self): # The input is set up to the global variable input_key # which is modified in the processing part return self.input_key def processing(self): # First we check whether we are at the end of the round # print("processing ") if self.frame_data.getEmptyFlag( ) or self.frame_data.getRemainingFramesNumber() <= 0: # print("in the first if") self.is_game_just_started = True return if not self.is_game_just_started: # print("in the second if") # Simulate the delay and look ahead 2 frames. The simulator class exists already in FightingICE self.frame_data = self.simulator.simulate(self.frame_data, self.player_num, None, None, self.FRAME_AHEAD) else: # If the game just started, no point on simulating self.is_game_just_started = False # print("we got passed the if statements ") self.cc.setFrameData(self.frame_data, self.player_num) # print("state updating!") self.state.Update(self.cc, self.frame_data, self.player_num) # print("state updates!") # distance = self.frame_data.getDistanceX() # energy = my.getEnergy() # my_x = my.getX() # my_state = my.getState() # opp_x = opp.getX() # opp_state = opp.getState() # xDifference = my_x - opp_x # print("starting the second set of ifs") if self.cc.getSkillFlag(): # If there is a previous "command" still in execution, then keep doing it self.input_key = self.cc.getSkillKey() return # We empty the keys and cancel skill just in case # print("the if is done!") self.input_key.empty() self.cc.skillCancel() # print("setting actions for the state") self.state.SetActions(self.frame_data, self.player_num) # print("setting actions for the state done!") # print("retrying reward") reward = abs(self.op_last_hp - self.state.op_char.getHp()) - abs( self.my_last_hp - self.state.my_char.getHp()) # print("it is the HP isn't it") self.my_last_hp = self.state.my_char.getHp() self.op_last_hp = self.state.op_char.getHp() # print("nope maybe next_action?") next_action = self.agent.Update(self.frame_data, reward, self.current_action.action_weight) # print("nope") # print('-----------------------------------------------') # print(self.agent.actions_weights) self.current_action = next_action # print("trying to get chosen_action") # print(self.state.my_actions) # print(self.current_action) # print(self.current_action.action_index) chosen_action = self.state.my_actions[self.current_action.action_index] # print("and success!") # print("and now execting the action") self.ExecuteOption(chosen_action) def ExecuteOption(self, action): # print(action) if type(action) is str: # print("action is a string") print("The option picked: ", action) action_name = action else: action_name = action.name() selected_action = self.ACTION.NEUTRAL # print("starting the ifs") if "OPTION" in action_name: # print("it is an option") if "GUARD" in action_name: self.action_air = [self.ACTION.AIR_GUARD] # print("done with air") self.action_ground = [ self.ACTION.DASH, self.ACTION.NEUTRAL, self.ACTION.STAND_A, self.ACTION.CROUCH_B, self.ACTION.THROW_A, self.ACTION.STAND_B, self.ACTION.CROUCH_A ] # print("done with ground") self.op_action_air = [ self.ACTION.AIR_B, self.ACTION.AIR_DB, self.ACTION.AIR_FB ] self.op_action_ground = [ self.ACTION.STAND, self.ACTION.DASH, self.ACTION.STAND_A, self.ACTION.CROUCH_B, self.ACTION.STAND_B ] self.simulate_time = 60 elif "KICK" in action_name: # print("I am kicker") self.action_air = [self.ACTION.AIR_GUARD] # print("done with air") self.action_ground = [ self.ACTION.STAND, self.ACTION.DASH, self.ACTION.FORWARD_WALK, self.ACTION.CROUCH_A, self.ACTION.CROUCH_B, self.ACTION.CROUCH_FB, self.ACTION.STAND_D_DB_BB ] # print("done with ground") self.op_action_air = [ self.ACTION.AIR_B, self.ACTION.AIR_DB, self.ACTION.AIR_FB ] # print("done with option air") self.op_action_ground = [ self.ACTION.STAND, self.ACTION.DASH, self.ACTION.CROUCH_FB ] # print("done with option ground") self.simulate_time = 60 elif "GRAB" in action_name: # print("I am grabber") self.action_air = [self.ACTION.AIR] # print("done with air") self.action_ground = [ self.ACTION.FORWARD_WALK, self.ACTION.DASH, self.ACTION.STAND_A, self.ACTION.THROW_A ] # print("done with ground") self.op_action_air = [self.ACTION.AIR] # print("done with option air") self.op_action_ground = [ self.ACTION.STAND, self.ACTION.DASH, self.ACTION.STAND_A ] # print("done with option ground") self.simulate_time = 20 elif "ANTI-AIR" in action_name: # print("I am antiair") self.action_air = [self.ACTION.AIR_GUARD] # print("done with air") self.action_ground = [ self.ACTION.FORWARD_WALK, self.ACTION.CROUCH_FA, self.ACTION.STAND_FB ] # print("done with ground") self.op_action_air = [self.ACTION.NEUTRAL] # print("done with option air") self.op_action_ground = [self.ACTION.NEUTRAL] # print("done with option ground") self.simulate_time = 20 elif "ALL_ACTIONS" in action_name: self.action_air = [ self.ACTION.AIR_GUARD, self.ACTION.AIR_A, self.ACTION.AIR_B, self.ACTION.AIR_DA, self.ACTION.AIR_DB, self.ACTION.AIR_FA, self.ACTION.AIR_FB, self.ACTION.AIR_UA, self.ACTION.AIR_UB, self.ACTION.AIR_D_DF_FA, self.ACTION.AIR_D_DF_FB, self.ACTION.AIR_F_D_DFA, self.ACTION.AIR_F_D_DFB, self.ACTION.AIR_D_DB_BA, self.ACTION.AIR_D_DB_BB ] # print("done with air") self.action_ground = [ self.ACTION.STAND_D_DB_BA, self.ACTION.BACK_STEP, self.ACTION.FORWARD_WALK, self.ACTION.DASH, self.ACTION.JUMP, self.ACTION.FOR_JUMP, self.ACTION.BACK_JUMP, self.ACTION.STAND_GUARD, self.ACTION.CROUCH_GUARD, self.ACTION.THROW_A, self.ACTION.THROW_B, self.ACTION.STAND_A, self.ACTION.STAND_B, self.ACTION.CROUCH_A, self.ACTION.CROUCH_B, self.ACTION.STAND_FA, self.ACTION.STAND_FB, self.ACTION.CROUCH_FA, self.ACTION.CROUCH_FB, self.ACTION.STAND_D_DF_FA, self.ACTION.STAND_D_DF_FB, self.ACTION.STAND_F_D_DFA, self.ACTION.STAND_F_D_DFB, self.ACTION.STAND_D_DB_BB ] # print("done with ground") self.op_action_air = self.action_air # print("done with option air") self.op_action_ground = self.action_ground # print("done with option ground") self.simulate_time = 60 self.MCTSPrepare() root_node = TreeNode(self.gateway, self.simulator_ahead_frame_data, None, self.my_actions, self.op_actions, self.game_data, self.player_num, self.cc) best_action = root_node.MCTS() print("exeuting: ", best_action.name()) self.cc.commandCall(best_action.name()) def MCTSPrepare(self): # print(self.FRAME_AHEAD) self.simulator_ahead_frame_data = self.simulator.simulate( self.frame_data, self.player_num, None, None, self.FRAME_AHEAD) self.my_char = self.simulator_ahead_frame_data.getCharacter( self.player_num) self.op_char = self.simulator_ahead_frame_data.getCharacter( not self.player_num) # print("Getting my actions") self.SetMyAction() # print("Getting op actions") self.SetOpAction() def SetMyAction(self): # print("clearing my actions") self.my_actions = [] # print("getting eneregy") energy = self.my_char.getEnergy() #actions.add(self.gateway.jvm.enumerate.Action.) # print("checkig if AIR ") if str(self.my_char.getState()) == "AIR": # print("start of the for loop") for i in range(len(self.action_air)): # print("checking if we have enough energy") if abs(self.my_motion_data[ self.gateway.jvm.enumerate.Action.valueOf( self.action_air[i].name()).ordinal()]. getAttackStartAddEnergy()) <= energy: self.my_actions.append(self.action_air[i]) else: # print("we are not in the air ") # print("checking the motion stuff") move_index = self.gateway.jvm.enumerate.Action.valueOf( self.sp_skill.name()).ordinal() # print("trying motion data: ",abs(self.my_motion_data[move_index].getAttackStartAddEnergy())) if abs(self.my_motion_data[move_index].getAttackStartAddEnergy() ) <= energy: # print("the if worked") self.my_actions.append(self.sp_skill) # print("so did the append!") for i in range(len(self.action_ground)): if abs(self.my_motion_data[ self.gateway.jvm.enumerate.Action.valueOf( self.action_ground[i].name()).ordinal()]. getAttackStartAddEnergy()) <= energy: self.my_actions.append(self.action_ground[i]) def SetOpAction(self): self.op_actions = [] energy = self.op_char.getEnergy() if str(self.op_char.getState()) == "AIR": for i in range(len(self.action_air)): if abs(self.op_motion_data[ self.gateway.jvm.enumerate.Action.valueOf( self.action_air[i].name()).ordinal()]. getAttackStartAddEnergy()) <= energy: self.op_actions.append(self.action_air[i]) else: if abs(self.op_motion_data[self.gateway.jvm.enumerate.Action. valueOf(self.sp_skill.name()).ordinal( )].getAttackStartAddEnergy()) <= energy: self.op_actions.append(self.sp_skill) for i in range(len(self.action_ground)): if abs(self.op_motion_data[ self.gateway.jvm.enumerate.Action.valueOf( self.action_ground[i].name()).ordinal()]. getAttackStartAddEnergy()) <= energy: self.op_actions.append(self.action_ground[i]) class Java: implements = ["aiinterface.AIInterface"]
def main(args): os.environ['KMP_WARNINGS'] = '0' torch.cuda.manual_seed_all(1) np.random.seed(0) # filter array num_features = [ args.features * i for i in range(1, args.levels + 2 + args.levels_without_sample) ] # 確定 輸出大小 target_outputs = int(args.output_size * args.sr) # 訓練才保存模型設定參數 # 設定teacher and student and student_for_backward 超參數 student_KD = Waveunet(args.channels, num_features, args.channels, levels=args.levels, encoder_kernel_size=args.encoder_kernel_size, decoder_kernel_size=args.decoder_kernel_size, target_output_size=target_outputs, depth=args.depth, strides=args.strides, conv_type=args.conv_type, res=args.res) KD_optimizer = Adam(params=student_KD.parameters(), lr=args.lr) print(25 * '=' + 'model setting' + 25 * '=') print('student_KD: ', student_KD.shapes) if args.cuda: student_KD = utils.DataParallel(student_KD) print("move student_KD to gpu\n") student_KD.cuda() state = {"step": 0, "worse_epochs": 0, "epochs": 0, "best_pesq": -np.Inf} if args.load_model is not None: print("Continuing full model from checkpoint " + str(args.load_model)) state = utils.load_model(student_KD, KD_optimizer, args.load_model, args.cuda) dataset = get_folds(args.dataset_dir, args.outside_test) log_dir, checkpoint_dir, result_dir = utils.mkdir_and_get_path(args) # print(model) if args.test is False: writer = SummaryWriter(log_dir) # set hypeparameter # printing hypeparameters info with open(os.path.join(log_dir, 'config.json'), 'w') as f: json.dump(args.__dict__, f, indent=5) print('saving commandline_args') if args.teacher_model is not None: print(25 * '=' + 'printing hypeparameters info' + 25 * '=') print(f'KD_method = {args.KD_method}') teacher_num_features = [ 24 * i for i in range(1, args.levels + 2 + args.levels_without_sample) ] teacher_model = Waveunet( args.channels, teacher_num_features, args.channels, levels=args.levels, encoder_kernel_size=args.encoder_kernel_size, decoder_kernel_size=args.decoder_kernel_size, target_output_size=target_outputs, depth=args.depth, strides=args.strides, conv_type=args.conv_type, res=args.res) student_copy = Waveunet( args.channels, num_features, args.channels, levels=args.levels, encoder_kernel_size=args.encoder_kernel_size, decoder_kernel_size=args.decoder_kernel_size, target_output_size=target_outputs, depth=args.depth, strides=args.strides, conv_type=args.conv_type, res=args.res) copy_optimizer = Adam(params=student_copy.parameters(), lr=args.lr) student_copy2 = Waveunet( args.channels, num_features, args.channels, levels=args.levels, encoder_kernel_size=args.encoder_kernel_size, decoder_kernel_size=args.decoder_kernel_size, target_output_size=target_outputs, depth=args.depth, strides=args.strides, conv_type=args.conv_type, res=args.res) copy2_optimizer = Adam(params=student_copy2.parameters(), lr=args.lr) policy_network = RL(n_inputs=2, kernel_size=6, stride=1, conv_type=args.conv_type, pool_size=4) PG_optimizer = Adam(params=policy_network.parameters(), lr=args.RL_lr) if args.cuda: teacher_model = utils.DataParallel(teacher_model) policy_network = utils.DataParallel(policy_network) student_copy = utils.DataParallel(student_copy) student_copy2 = utils.DataParallel(student_copy2) # print("move teacher to gpu\n") teacher_model.cuda() # print("student_copy to gpu\n") student_copy.cuda() # print("student_copy2 to gpu\n") student_copy2.cuda() # print("move policy_network to gpu\n") policy_network.cuda() student_size = sum(p.numel() for p in student_KD.parameters()) teacher_size = sum(p.numel() for p in teacher_model.parameters()) print('student_parameter count: ', str(student_size)) print('teacher_model_parameter count: ', str(teacher_size)) print('RL_parameter count: ', str(sum(p.numel() for p in policy_network.parameters()))) print(f'compression raito :{100*(student_size/teacher_size)}%') if args.teacher_model is not None: print("load teacher model" + str(args.teacher_model)) _ = utils.load_model(teacher_model, None, args.teacher_model, args.cuda) teacher_model.eval() if args.load_RL_model is not None: print("Continuing full RL_model from checkpoint " + str(args.load_RL_model)) _ = utils.load_model(policy_network, PG_optimizer, args.load_RL_model, args.cuda) # If not data augmentation, at least crop targets to fit model output shape crop_func = partial(crop, shapes=student_KD.shapes) ### DATASET train_data = SeparationDataset(dataset, "train", args.sr, args.channels, student_KD.shapes, False, args.hdf_dir, audio_transform=crop_func) val_data = SeparationDataset(dataset, "test", args.sr, args.channels, student_KD.shapes, False, args.hdf_dir, audio_transform=crop_func) dataloader = torch.utils.data.DataLoader( train_data, batch_size=args.batch_size, shuffle=False, num_workers=args.num_workers, worker_init_fn=utils.worker_init_fn, pin_memory=True) # Set up the loss function if args.loss == "L1": criterion = nn.L1Loss() elif args.loss == "L2": criterion = nn.MSELoss() else: raise NotImplementedError("Couldn't find this loss!") My_criterion = customLoss() ### TRAINING START print('TRAINING START') if state["epochs"] > 0: state["epochs"] = state["epochs"] + 1 batch_num = (len(train_data) // args.batch_size) if args.teacher_model is not None: counting = 0 PG_lr_scheduler = torch.optim.lr_scheduler.ExponentialLR( optimizer=PG_optimizer, gamma=args.decayRate) while counting < state["epochs"]: PG_optimizer.zero_grad() PG_optimizer.step() counting += 1 PG_lr_scheduler.step() # print(f'modify lr RL rate : {counting} , until : {state["epochs"]}') while state["epochs"] < 100: memory_alpha = [] print("epoch:" + str(state["epochs"])) # monitor_value total_avg_reward = 0 total_avg_scalar_reward = 0 avg_origin_loss = 0 all_avg_KD_rate = 0 same = 0 with tqdm(total=len(dataloader)) as pbar: for example_num, (x, targets) in enumerate(dataloader): # if example_num==20: # break student_KD.train() if args.cuda: x = x.cuda() targets = targets.cuda() if args.teacher_model is not None: student_copy.train() student_copy2.train() # Set LR for this iteration temp = {'state_dict': None, 'optim_dict': None} temp['state_dict'] = copy.deepcopy( student_KD.state_dict()) temp['optim_dict'] = copy.deepcopy( KD_optimizer.state_dict()) #print('base_model from KD') student_KD.load_state_dict(temp['state_dict']) KD_optimizer.load_state_dict(temp['optim_dict']) student_copy.load_state_dict(temp['state_dict']) copy_optimizer.load_state_dict(temp['optim_dict']) student_copy2.load_state_dict(temp['state_dict']) copy2_optimizer.load_state_dict(temp['optim_dict']) utils.set_cyclic_lr(KD_optimizer, example_num, len(train_data) // args.batch_size, args.cycles, args.min_lr, args.lr) utils.set_cyclic_lr(copy_optimizer, example_num, len(train_data) // args.batch_size, args.cycles, args.min_lr, args.lr) utils.set_cyclic_lr(copy2_optimizer, example_num, len(train_data) // args.batch_size, args.cycles, args.min_lr, args.lr) # forward student and teacher get output student_KD_output, avg_student_KD_loss = utils.compute_loss( student_KD, x, targets, criterion, compute_grad=False) teacher_output, _ = utils.compute_loss( teacher_model, x, targets, criterion, compute_grad=False) # PG_state diff_from_target = targets.detach( ) - student_KD_output.detach() diff_from_teacher = teacher_output.detach( ) - student_KD_output.detach() PG_state = torch.cat( (diff_from_target, diff_from_teacher), 1) # forward RL get alpha alpha = policy_network(PG_state) nograd_alpha = alpha.detach() avg_KD_rate = torch.mean(nograd_alpha).item() all_avg_KD_rate += avg_KD_rate / batch_num KD_optimizer.zero_grad() KD_outputs, KD_hard_loss, KD_loss, KD_soft_loss = utils.KD_compute_loss( student_KD, teacher_model, x, targets, My_criterion, alpha=nograd_alpha, compute_grad=True, KD_method=args.KD_method) KD_optimizer.step() copy_optimizer.zero_grad() _, _, _, _ = utils.KD_compute_loss( student_copy, teacher_model, x, targets, My_criterion, alpha=1, compute_grad=True, KD_method=args.KD_method) copy_optimizer.step() copy2_optimizer.zero_grad() _, _, _, _ = utils.KD_compute_loss( student_copy2, teacher_model, x, targets, My_criterion, alpha=0, compute_grad=True, KD_method=args.KD_method) copy2_optimizer.step() # calculate backwarded model MSE backward_KD_loss = utils.loss_for_sample( student_KD, x, targets) backward_copy_loss = utils.loss_for_sample( student_copy, x, targets) backward_copy2_loss = utils.loss_for_sample( student_copy2, x, targets) # calculate rewards rewards, same_num, before_decay = utils.get_rewards( backward_KD_loss.detach(), backward_copy_loss.detach(), backward_copy2_loss.detach(), backward_KD_loss.detach(), len(train_data), state["epochs"] + 1) same += same_num rewards = rewards.detach() avg_origin_loss += avg_student_KD_loss / batch_num # avg_reward avg_reward = torch.mean(rewards) avg_scalar_reward = torch.mean(torch.abs(rewards)) total_avg_reward += avg_reward.item() / batch_num total_avg_scalar_reward += avg_scalar_reward.item( ) / batch_num # append to memory_alpha nograd_alpha = nograd_alpha.detach().cpu() memory_alpha.append(nograd_alpha.numpy()) PG_optimizer.zero_grad() _ = utils.RL_compute_loss(alpha, rewards, nn.MSELoss()) PG_optimizer.step() # print info # print(f'avg_KD_rate = {avg_KD_rate} ') # print(f'student_KD_loss = {avg_student_KD_loss}') # print(f'backward_student_copy_loss = {np.mean(backward_copy_loss.detach().cpu().numpy())}') # print(f'backward_student_KD_loss = {np.mean(backward_KD_loss.detach().cpu().numpy())}') # print(f'backward_student_copy2_loss = {np.mean(backward_copy2_loss.detach().cpu().numpy())}') # print(f'avg_reward = {avg_reward}') # print(f'total_avg_reward = {total_avg_reward}') # print(f'same = {same}') # add to tensorboard writer.add_scalar("student_KD_loss", avg_student_KD_loss, state["step"]) writer.add_scalar( "backward_student_KD_loss", np.mean(backward_KD_loss.detach().cpu().numpy()), state["step"]) writer.add_scalar("KD_loss", KD_loss, state["step"]) writer.add_scalar("KD_hard_loss", KD_hard_loss, state["step"]) writer.add_scalar("KD_soft_loss", KD_soft_loss, state["step"]) writer.add_scalar("avg_KD_rate", avg_KD_rate, state["step"]) writer.add_scalar("rewards", avg_reward, state["step"]) writer.add_scalar("scalar_rewards", avg_scalar_reward, state["step"]) writer.add_scalar("before_decay", before_decay, state["step"]) else: # no KD training utils.set_cyclic_lr(KD_optimizer, example_num, len(train_data) // args.batch_size, args.cycles, args.min_lr, args.lr) KD_optimizer.zero_grad() KD_outputs, KD_hard_loss = utils.compute_loss( student_KD, x, targets, nn.MSELoss(), compute_grad=True) KD_optimizer.step() avg_origin_loss += KD_hard_loss / batch_num writer.add_scalar("student_KD_loss", KD_hard_loss, state["step"]) ### save wav #### if example_num % args.example_freq == 0: input_centre = torch.mean( x[0, :, student_KD.shapes["output_start_frame"]: student_KD.shapes["output_end_frame"]], 0) # Stereo not supported for logs yet # target=torch.mean(targets[0], 0).cpu().numpy() # pred=torch.mean(KD_outputs[0], 0).detach().cpu().numpy() # inputs=input_centre.cpu().numpy() writer.add_audio("input:", input_centre, state["step"], sample_rate=args.sr) writer.add_audio("pred:", torch.mean(KD_outputs[0], 0), state["step"], sample_rate=args.sr) writer.add_audio("target", torch.mean(targets[0], 0), state["step"], sample_rate=args.sr) state["step"] += 1 pbar.update(1) # VALIDATE val_loss, val_metrics = validate(args, student_KD, criterion, val_data) print("ori VALIDATION FINISHED: LOSS: " + str(val_loss)) choose_val = val_metrics if args.teacher_model is not None: for i in range(len(nograd_alpha)): writer.add_scalar("KD_rate_" + str(i), nograd_alpha[i], state["epochs"]) print(f'all_avg_KD_rate = {all_avg_KD_rate}') writer.add_scalar("all_avg_KD_rate", all_avg_KD_rate, state["epochs"]) # writer.add_scalar("val_loss_copy", val_loss_copy, state["epochs"]) writer.add_scalar("total_avg_reward", total_avg_reward, state["epochs"]) writer.add_scalar("total_avg_scalar_reward", total_avg_scalar_reward, state["epochs"]) RL_checkpoint_path = os.path.join( checkpoint_dir, "RL_checkpoint_" + str(state["epochs"])) utils.save_model(policy_network, PG_optimizer, state, RL_checkpoint_path) PG_lr_scheduler.step() writer.add_scalar("same", same, state["epochs"]) writer.add_scalar("avg_origin_loss", avg_origin_loss, state["epochs"]) writer.add_scalar("val_enhance_pesq", choose_val[0], state["epochs"]) writer.add_scalar("val_improve_pesq", choose_val[1], state["epochs"]) writer.add_scalar("val_enhance_stoi", choose_val[2], state["epochs"]) writer.add_scalar("val_improve_stoi", choose_val[3], state["epochs"]) writer.add_scalar("val_enhance_SISDR", choose_val[4], state["epochs"]) writer.add_scalar("val_improve_SISDR", choose_val[5], state["epochs"]) # writer.add_scalar("val_COPY_pesq",val_metrics_copy[0], state["epochs"]) writer.add_scalar("val_loss", val_loss, state["epochs"]) # Set up training state dict that will also be saved into checkpoints checkpoint_path = os.path.join( checkpoint_dir, "checkpoint_" + str(state["epochs"])) if choose_val[0] < state["best_pesq"]: state["worse_epochs"] += 1 else: print("MODEL IMPROVED ON VALIDATION SET!") state["worse_epochs"] = 0 state["best_pesq"] = choose_val[0] state["best_checkpoint"] = checkpoint_path # CHECKPOINT print("Saving model...") utils.save_model(student_KD, KD_optimizer, state, checkpoint_path) print('dump alpha_memory') with open(os.path.join(log_dir, 'alpha_' + str(state["epochs"])), "wb") as fp: #Pickling pickle.dump(memory_alpha, fp) state["epochs"] += 1 writer.close() info = args.model_name path = os.path.join(result_dir, info) else: PATH = args.load_model.split("/") info = PATH[-3] + "_" + PATH[-1] if (args.outside_test == True): info += "_outside_test" print(info) path = os.path.join(result_dir, info) # test_data = SeparationDataset(dataset, "test", args.sr, args.channels, student_KD.shapes, False, args.hdf_dir, audio_transform=crop_func) #### TESTING #### # Test loss print("TESTING") # eval metrics #ling_data=get_ling_data_list('/media/hd03/sutsaiwei_data/data/mydata/ling_data') #validate(args, student_KD, criterion, test_data) #test_metrics = ling_evaluate(args, ling_data['noisy'], student_KD) #test_metrics = evaluate_without_noisy(args, dataset["test"], student_KD) test_metrics = evaluate(args, dataset["test"], student_KD) test_pesq = test_metrics['pesq'] test_stoi = test_metrics['stoi'] test_SISDR = test_metrics['SISDR'] test_noise = test_metrics['noise'] if not os.path.exists(path): os.makedirs(path) utils.save_result(test_pesq, path, "pesq") utils.save_result(test_stoi, path, "stoi") utils.save_result(test_SISDR, path, "SISDR") utils.save_result(test_noise, path, "noise")
import numpy as np import os import torch import _pickle as cPickle from RL import RL import time device = torch.device("cuda" if torch.cuda.is_available() else "cpu") localtime = time.asctime(time.localtime(time.time())) os.system('clear') data = [] rl = RL(Network='NN_2', system_size=3, p_error=0.1, capacity=200, dropout=0.0, learning_rate=0.00025, discount_factor=0.95) rl.train_for_n_epochs(training_steps=100, evaluation_steps=100, prediction_steps=10, epochs=10, clip_error_term=5, target_update=10, reward_definition=0, optimizer='Adam', save_model_each_epoch=True, data=data, localtime=localtime)
from RL import RL from State import State from Policy import Policy from pathlib import Path import numpy print("___________") print("TIC TAC TOE") print("___________\n") print("Learning...") trainEpisodes = 100000 AI = RL(0.05) Qfile = Path("Qvals.npy") if Qfile.is_file(): print("Loaded Q File") AI.policy.Q = numpy.load("Qvals.npy") AI.QLearning(0.95,0.9,0.1,trainEpisodes) else: print("Starting New Training") AI.QLearning(0.95,0.9,0.1,trainEpisodes) numpy.save("Qvals.npy", AI.policy.Q) ''' Game ''' while(True): val = input("\nEnter 1 to go first, enter otherwise to go second: ")
import json from Utils import Utils from RL import RL from random import randint import sys if sys.argv[1] and sys.argv[2]: rounds = int(sys.argv[2]) season = sys.argv[1].lower() model_file = season + "_Q.txt" utils = Utils() rl = RL() items_path = os.path.abspath( os.path.join(os.path.dirname(__file__), '..', 'shopping/items.csv')) items = pd.read_csv(items_path) Q_path = os.path.abspath( os.path.join(os.path.dirname(__file__), '..', 'shopping/models/' + model_file)) Q = utils.load_Q(Q_path) Q = np.asmatrix(Q) steps = rl.run(rounds, Q) purchased_items = []
import numpy as np import pandas as pd import json from Utils import Utils from RL import RL utils = Utils() rl = RL() items = pd.read_csv("./items.csv") print(items) Q = utils.load_Q("./models/spring_Q.txt") Q = np.asmatrix(Q) steps = rl.run(7, Q) purchased_items = {"items": []} for step in steps: purchased_items['items'].append(items.loc[step]) df = pd.DataFrame(purchased_items) df.to_json("response2.json")