コード例 #1
0
ファイル: RLAgent.py プロジェクト: a3madkour/game_ai_final
    def initialize(self, game_data, player_num):
        # Initializng the command center, the simulator and some other things
        # print("Initializing")
        self.input_key = self.gateway.jvm.struct.Key()
        self.frame_data = self.gateway.jvm.struct.FrameData()
        self.cc = self.gateway.jvm.aiinterface.CommandCenter()
        self.simulate_time = 60
        self.player_num = player_num
        self.game_data = game_data
        self.simulator = self.game_data.getSimulator()
        # print("making the state")
        self.state = State(self.gateway, game_data, self.cc, player_num)
        self.is_game_just_started = True
        # print("making the action")
        self.current_action = ActionValue(0, 0, 0)
        # print("action is okay!")
        self.weight_path_p1 = None
        self.weight_path_p2 = None
        self.in_behaviour = False

        self.my_last_hp = 0
        self.op_last_hp = 0

        # print("creating the agent")

        # print(self.gateway)
        # print(self.state)
        # print(self.epsilon)
        # print(self.gamma)
        # print(self.alpha)
        # print(self.lamb)
        # print("state stuff")
        # print(self.state.features_num)
        # print(self.state.player_num)
        # print(self.use_exp_replay)

        self.agent = RL(self.gateway, self.state, self.epsilon, self.gamma,
                        self.alpha, self.lamb, self.state.features_num,
                        self.state.player_num, self.use_exp_replay)

        self.action_air = []

        self.action_ground = []

        self.sp_skill = self.ACTION.STAND_D_DF_FC

        self.my_motion_data = self.game_data.getMotionData(self.player_num)
        self.op_motion_data = self.game_data.getMotionData(not self.player_num)
        self.my_actions = []
        self.op_actions = []

        self.agent.epsilon = self.epsilon

        print(self.weights_path)
        print(self.agent.actions_weights)

        self.SetWeights()

        return 0
コード例 #2
0
    def __init__(self, outfile, mode=True):
        # A process descriptor array PCB[16]
        # A resource descriptor array RCB[4] with multiunit resources
        # RCB[0] and RCB[1] have 1 unit each; RCB[2] has 2 units; RCB[3] has 3 units
        #   A 3-level RL
        self.PCBIndex = PCBIndex()
        self.RL = RL()
        self.RCBIndex = RCBIndex()
        self.n = SIZE
        self.mode = mode
        if self.mode:
            self.output = outfile  # o
        # this is the process at the head of RL

        # Erase all previous contents of the data structures PCB, RCB, RL
        self.start()
コード例 #3
0
def update():
    for episode in range(epoch):
        current_state = env.reset()
        count = 0
        while True:
            env.render()
            #步数加一
            count += 1

            # Sarsa学习,执行action得到回报值和新的状态, ε-greedy随机选择动作,取该动作对应的Q值,再更新Q表
            if (count == 1):
                action = RL.choose_action(str(current_state))
            new_state, reward, finished_or_not = env.step(action, count, RL)
            new_action = RL.choose_action(str(new_state))
            RL.learn(str(current_state), action, reward, str(new_state),
                     new_action)
            action = new_action

            #Q学习,执行action得到回报值和新的状态,取新的状态的Q的最大值 ,再更新Q表
            # action = RL.choose_action(str(current_state))
            # new_state, reward, finished_or_not = env.step(action, count, RL)
            # RL.learn(str(current_state), action, reward, str(new_state))

            current_state = new_state
            if finished_or_not:
                if (episode == epoch - 1):
                    env.diaplay(RL.q_table)
                    print(env.get_outcome())
                    with open(RL.__class__.__name__ + "5.txt", "w") as f:
                        for i in env.get_outcome():
                            text = f.write(str(i) + "\n")
                    f.close()

                    time.sleep()
                break
    env.destroy()
コード例 #4
0
 def removeFromRL(self, ready_list : RL, to_be_deleted_id : int):
     ready_list.remove(j=to_be_deleted_id)
     return ready_list
コード例 #5
0
mode = "CPU"
runOnGPU = len(sys.argv)>1      #If a console parameter is received run in GPU. else run on CPU
if(runOnGPU):
    mode = "GPU"


learning_rate = 0.00001
epsilon = 0.05
discount = 0.95
_lambda = 0.80
trainEpisodes = 20

#Number of episodes to run before displaying learning stats
display_frequency = 10

AI = RL(learning_rate, epsilon,discount, _lambda, display_frequency)

CPUfile = Path("netCPU.pt")
GPUfile = Path("netGPU.pt")

if (runOnGPU and GPUfile.is_file()) or (not runOnGPU and CPUfile.is_file()):
    print("Loaded Network", mode)
    print("Learning...")
    if(runOnGPU):
        print("Implement for GPU")
        #AI.approximator = torch.load("netGPU.pt")
        #AI.QLearningGPU(trainEpisodes)
        #torch.save(AI.approximator, "netGPU.pt")
    else:
        AI.approximator = torch.load("netCPU.pt")
        AI.QLearning(trainEpisodes)
コード例 #6
0
 def __init__(self, actions, epsilon, alpha=0.2, gamma=1.0):
     RL.__init__(self, actions, epsilon, alpha, gamma)
コード例 #7
0
class Manager:
    def __init__(self, outfile, mode=True):
        # A process descriptor array PCB[16]
        # A resource descriptor array RCB[4] with multiunit resources
        # RCB[0] and RCB[1] have 1 unit each; RCB[2] has 2 units; RCB[3] has 3 units
        #   A 3-level RL
        self.PCBIndex = PCBIndex()
        self.RL = RL()
        self.RCBIndex = RCBIndex()
        self.n = SIZE
        self.mode = mode
        if self.mode:
            self.output = outfile  # o
        # this is the process at the head of RL

        # Erase all previous contents of the data structures PCB, RCB, RL
        self.start()

    """
    start(): The init function should always perform the following tasks:
                Create a single running process at PCB[0] with priority 0
                Enter the process into the RL at the lowest-priority level 0
                self.PCB._process_list[0] = Process(pid=0, prio=0)
    """

    def start(self):

        self.PCBIndex.start()
        self.runningid = 0
        self.RL.enqueue(pid=0, priority=0)
        if self.mode:
            self.output.write("\n")
        self.scheduler()

    """
    create(): a new process is created once and is given a unique id, a parent, and a priority
                a context switch is ran in case the new process has a higher priority
    """

    def create(self, priority: int):
        if (1 <= priority <= 2):

            if self.PCBIndex.getCurrent() >= self.n:
                self.displayError()
            else:
                pid = self.PCBIndex.create(priority, parent=self.runningid)
                parent_index = self.PCBIndex.find(self.runningid)
                self.PCBIndex.accessProcessChild(parent_index, pid)
                self.RL.enqueue(pid=pid, priority=priority)
                # print("process", pid, "created")
                # print(pid)
                self.scheduler()
        else:
            self.displayError()

    """
    destroy(): removes a process and its children from the program. This includes the deletion of its PCB,
                removal from the ReadyList, releasing of possible resources, and the cleansing of a resources
                WaitList
    """

    def destroy(self, j):
        if j == 0:  # EXCEPTION - attempt to delete process 0
            self.displayError()
        elif self.PCBIndex.checkIsValid(parent_id=self.runningid, pid=j):
            # find the set of children process may have
            total, set_of_children = self.PCBIndex.remove(
                self.runningid, j)  # for all k in j destroy(k)
            # destroy children
            for pid in set_of_children:
                tbd_index = self.PCBIndex.find(pid)

                a = self.PCBIndex.process_list[tbd_index].resources
                while a is not None:
                    self.releaseInDestroy(a.value[0], pid)
                    a = a.next
                if self.PCBIndex.exists(pid):
                    self.PCBIndex.removeFromParent(
                        self.PCBIndex.process_list[tbd_index].getParent(), pid)
                self.RL.remove(decision=False, j=pid)
                self.PCBIndex.process_list[tbd_index] = None

            # destroy j - remove j from its parent, ready list,
            tbd_index = self.PCBIndex.find(j)
            a = self.PCBIndex.process_list[tbd_index].resources
            if self.PCBIndex.exists(j):
                self.PCBIndex.removeFromParent(
                    self.PCBIndex.process_list[tbd_index].getParent(), j)
            while a is not None:
                self.releaseInDestroy(a.value[0], j)
                a = a.next
            self.RL.remove(decision=False, j=j)
            self.PCBIndex.process_list[tbd_index] = None

            # run through RCBs waitinglist to remove possible waiting processes
            alist = list(set_of_children)
            alist.append(j)
            self.RCBIndex.cleanseWaitlist(alist)

            # fix PCBindex in case there are any gaps
            self.PCBIndex.reshuffle()

            self.scheduler()

        else:  # EXCEPTION - attempt to delete a process that is not there
            self.displayError()

    """
    request(): the running process makes an attempt to request a number of units from a given resource
                if the resource has the number of requested units available, it is allocated to the running process
                if the resource does not have the units available, the running process is removed from the ReadyList,
                its state is set to Blocked, and is placed on the requested resource's WaitList
    """

    def request(self, resource, numUnits):
        if resource < 0 or resource > 3:
            self.displayError()
            return
        refResource = self.RCBIndex.getResource(resource)

        if (self.runningid == 0
            ):  # EXCEPTION - requesting resource for process 0
            self.displayError()
        elif (numUnits == 0
              ):  # requesting 0 units is not an exception but it does nothing
            self.scheduler()
            return  # this refers to piazza post: https://piazza.com/class/kffw9nufy3m5qf?cid=35
        elif (refResource == -1):  # EXCEPTION - nonexistent resource
            self.displayError()
        elif (refResource.state >= numUnits):
            value = self.PCBIndex.accessProcessHasResource(
                process_index=self.PCBIndex.find(self.runningid), rid=resource)
            if value != -1:  # has resource and can append it
                self.PCBIndex.appendProcessResource(
                    process_index=self.PCBIndex.find(self.runningid),
                    rid=resource,
                    numUnits=numUnits)
                self.RCBIndex.setResource(rid=resource,
                                          k=numUnits,
                                          change=True)
                self.scheduler()
            else:  # doesnt have resource and can add it to its resource list

                self.RCBIndex.setResource(rid=resource,
                                          k=numUnits,
                                          change=True)
                parent_index = self.PCBIndex.find(self.runningid)
                self.PCBIndex.addProcessResource(process_index=parent_index,
                                                 rid=resource,
                                                 numUnits=numUnits)
                self.scheduler()
                # print("received", numUnits, "units")
        else:
            value = self.PCBIndex.accessProcessHasResource(
                process_index=self.PCBIndex.find(self.runningid), rid=resource)
            if value != -1:
                # process has resource we must add the prexisting value to the numUnits the resource has requested
                if numUnits > refResource.inventory:  # EXCEPTION - process is requesting more than what a resource initially has
                    self.displayError()
                elif (
                        value + numUnits
                ) > refResource.inventory:  # EXCEPTION - process is requesting more than what a resource has
                    self.displayError()
                else:
                    parent_index = self.PCBIndex.find(self.runningid)
                    self.PCBIndex.accessProcessState(
                        process_index=parent_index, state=0)
                    self.RL.remove(decision=True, j=self.runningid)
                    self.RCBIndex.enqueue(rid=resource,
                                          i=self.runningid,
                                          k=numUnits)
                    self.scheduler()
            else:
                if numUnits > refResource.inventory:  # EXCEPTION - process is requesting more than what a resource initially has
                    self.displayError()
                    return
                parent_index = self.PCBIndex.find(self.runningid)
                self.PCBIndex.accessProcessState(process_index=parent_index,
                                                 state=0)
                self.RL.remove(decision=True, j=self.runningid)
                self.RCBIndex.enqueue(rid=resource,
                                      i=self.runningid,
                                      k=numUnits)
                self.scheduler()

    """
    release(): releases the desired resources 
    """

    def release(self, resource, number):
        # check if resource is allocated to running process
        if resource < 0 or resource > 3:
            self.displayError()
            return

        value = self.PCBIndex.accessProcessHasResource(
            process_index=self.PCBIndex.find(self.runningid), rid=resource)

        if value == -1 or value < number:  # EXCEPTION - Releasing a resource the process is not holding
            self.displayError()
        elif number == 0:  # not exception, but attempt to release 0 units does nothing
            self.scheduler()
            return
        else:
            # remove r from resources list of process i
            amount_to_add = self.PCBIndex.releaseResource(
                self.PCBIndex.find(self.runningid), resource, number)
            # add amount released back into resource
            self.RCBIndex.setResource(resource, amount_to_add, False)
            refResource = self.RCBIndex.getResource(resource)
            if refResource.waiting_list is None:
                pass
            else:
                #  This only grabs looks at one spot in waitinglist
                while (self.RCBIndex.peek(resource)):

                    pid, amount = self.RCBIndex.dequeue(resource)
                    process_index = self.PCBIndex.find(pid)
                    self.PCBIndex.accessProcessState(process_index, 1)
                    self.RCBIndex.setResource(resource, amount, True)
                    # check if process has the resource
                    if self.PCBIndex.accessProcessHasResource(
                            process_index, resource) != -1:
                        self.PCBIndex.appendProcessResource(
                            process_index, resource, amount)
                    else:
                        self.PCBIndex.addProcessResource(
                            process_index, resource, amount)

                    self.RL.enqueue(
                        pid,
                        self.PCBIndex.process_list[process_index].getPriority(
                        ))
            # print("resource", str(resource), "released")
            self.scheduler()

    """
    timeout(): performs a purposeful context switch
    """

    def timeout(self):
        # moves running process to the end of their priority level
        # calls scheduler at the end
        self.RL.timeout()
        self.scheduler()

    """
    scheduler(): retrieves the highest priority process 
                used for context switches
    """

    def scheduler(self):
        # if (self.RL.findHead() != -1):
        # print("process " + str(self.RL.getHead()) + " is running")
        # print(str(self.RL.getHead()))
        self.RL.findHead()
        self.runningid = self.RL.getHead()
        if self.mode:
            self.output.write(str(self.runningid) + " ")
        else:
            print(str(self.RL.getHead()))

    """
    Helper Functions: functions that are not explicitly required but are needed to run required ones
    """

    def displayError(self):
        if self.mode:
            self.output.write(str(-1) + " ")
        else:
            print(-1)

    def releaseInDestroy(self, resource, from_pid):
        value = self.PCBIndex.accessProcessHasResource(
            process_index=self.PCBIndex.find(from_pid), rid=resource)

        if value == -1:  # Exceptions: Releasing a resource the process is not holding
            # print(-1)
            self.displayError()
        else:
            amount_to_add = self.PCBIndex.releaseAllResource(
                self.PCBIndex.find(from_pid),
                resource)  # remove r from resources list of process i
            self.RCBIndex.setResource(
                resource, amount_to_add,
                False)  # add amount released back into resource
            refResource = self.RCBIndex.getResource(resource)
            if refResource.waiting_list is None:
                pass
            else:
                pid, amount = self.RCBIndex.dequeue(resource)
                process_index = self.PCBIndex.find(pid)
                if process_index != -1 and process_index != None:
                    self.PCBIndex.accessProcessState(process_index, 1)
                    self.RCBIndex.setResource(resource, amount, True)
                    # check if process has the resource
                    if self.PCBIndex.accessProcessHasResource(
                            process_index, resource) != -1:
                        self.PCBIndex.appendProcessResource(
                            process_index, resource, amount)
                    else:
                        self.PCBIndex.addProcessResource(
                            process_index, resource, amount)

                    self.RL.enqueue(
                        pid,
                        self.PCBIndex.process_list[process_index].getPriority(
                        ))
            # print("resource", str(resource), "released")
            # self.scheduler()

    """
    DEBUG Functions
    """

    def printRL(self):
        node = self.RL.readylist
        print("printRL")
        for i in range(0, 3):
            print("\nin " + str(i), end=" ")
            node = self.RL.readylist[i]
            while (node != None):
                print(str(node.value) + "->", end='')
                node = node.next
        print()

    def printPCB(self):
        print("print PCB")
        for i in range(self.PCBIndex.getCurrent()):
            print("Id: " + str(self.PCBIndex.process_list[i]))

    def printRCB(self):
        print("print RCB")
        for i in range(0, 4):
            print("Id: " + str(self.RCBIndex.resource_list[i]))
コード例 #8
0
#Learning parameters
batch_size = 64
learning_rate = 0.000001
initial_epsilon = 0.25
epsilon_decay = 0.999997        #This decay value achieves 0.97 in episode 10,000, 0.74 in episode 100,000, 0.05 in episode 1,000,000
discount = 0.95
trainEpisodes = 40000
experience_stored = 1000000
step_delta = 1000


#Number of episodes to run before displaying learning stats
display_frequency = 10

AI = RL(batch_size , learning_rate, initial_epsilon, epsilon_decay, discount, experience_stored, step_delta, display_frequency, runOnGPU)

CPUfile = Path("netCPU.pt")
GPUfile = Path("netGPU.pt")

#Load experience information from previous sessions
AI.approximator.loadExperience("experience.pkl")
if (runOnGPU and GPUfile.is_file()) or (not runOnGPU and CPUfile.is_file()):
    print("Loaded Network", mode)
    print("Learning...")
    if(runOnGPU):
        AI.approximator = torch.load("netGPU.pt")
        AI.QLearningGPU(trainEpisodes)
        torch.save(AI.approximator, "netGPU.pt")
    else:
        AI.approximator = torch.load("netCPU.pt")
コード例 #9
0
ファイル: RLAgent.py プロジェクト: a3madkour/game_ai_final
class RLAgent(object):

    FRAME_AHEAD = 14
    DEBUG_MODE = True
    epsilon = 0.01
    gamma = 0.95
    alpha = 0.2
    lamb = 0.1
    action_weights_number = 5
    use_exp_replay = False
    weights_path = "pickled_weights.pkl"

    def __init__(self, gateway):
        self.gateway = gateway
        self.my_actions = []
        self.op_actions = []
        self.ACTION = self.gateway.jvm.enumerate.Action

    def close(self):
        pass

    def getInformation(self, frame_data):
        # Load the frame data every time getInformation gets called
        self.state.frame_data = frame_data
        self.frame_data = frame_data
        self.cc.setFrameData(self.frame_data, self.player_num)
        self.state.my_char = self.frame_data.getCharacter(self.player_num)
        self.state.op_char = self.frame_data.getCharacter(not self.player_num)
        self.my_char = self.frame_data.getCharacter(self.player_num)
        self.op_char = self.frame_data.getCharacter(not self.player_num)

    # please define this method when you use FightingICE version 3.20 or later
    def roundEnd(self, x, y, z):
        print("round ending")
        print(self.weights_path)
        f = open(self.weights_path, 'wb')
        print("the file is open")
        print(self.agent.actions_weights)
        print("dumping ")
        pickle.dump(self.agent.actions_weights, f)
        # f.close()
        print(x)
        print(y)
        print(z)

    # please define this method when you use FightingICE version 4.00 or later
    def getScreenData(self, sd):
        pass

    def initialize(self, game_data, player_num):
        # Initializng the command center, the simulator and some other things
        # print("Initializing")
        self.input_key = self.gateway.jvm.struct.Key()
        self.frame_data = self.gateway.jvm.struct.FrameData()
        self.cc = self.gateway.jvm.aiinterface.CommandCenter()
        self.simulate_time = 60
        self.player_num = player_num
        self.game_data = game_data
        self.simulator = self.game_data.getSimulator()
        # print("making the state")
        self.state = State(self.gateway, game_data, self.cc, player_num)
        self.is_game_just_started = True
        # print("making the action")
        self.current_action = ActionValue(0, 0, 0)
        # print("action is okay!")
        self.weight_path_p1 = None
        self.weight_path_p2 = None
        self.in_behaviour = False

        self.my_last_hp = 0
        self.op_last_hp = 0

        # print("creating the agent")

        # print(self.gateway)
        # print(self.state)
        # print(self.epsilon)
        # print(self.gamma)
        # print(self.alpha)
        # print(self.lamb)
        # print("state stuff")
        # print(self.state.features_num)
        # print(self.state.player_num)
        # print(self.use_exp_replay)

        self.agent = RL(self.gateway, self.state, self.epsilon, self.gamma,
                        self.alpha, self.lamb, self.state.features_num,
                        self.state.player_num, self.use_exp_replay)

        self.action_air = []

        self.action_ground = []

        self.sp_skill = self.ACTION.STAND_D_DF_FC

        self.my_motion_data = self.game_data.getMotionData(self.player_num)
        self.op_motion_data = self.game_data.getMotionData(not self.player_num)
        self.my_actions = []
        self.op_actions = []

        self.agent.epsilon = self.epsilon

        print(self.weights_path)
        print(self.agent.actions_weights)

        self.SetWeights()

        return 0

    def SetWeights(self):
        try:
            f = open(self.weights_path, 'r')
            mutli_feat = pickle.load(f)
            f.close()
            self.agent.SetMultipleWeights(multi_feat)
        except:
            multi_feat = []
            for i in range(self.action_weights_number):
                feat = [0.0] * self.state.features_num
                # print(feat)
                multi_feat.append(feat)
            self.agent.SetMultipleWeights(multi_feat)

    def input(self):
        # The input is set up to the global variable input_key
        # which is modified in the processing part
        return self.input_key

    def processing(self):
        # First we check whether we are at the end of the round
        # print("processing ")
        if self.frame_data.getEmptyFlag(
        ) or self.frame_data.getRemainingFramesNumber() <= 0:
            # print("in the first if")
            self.is_game_just_started = True
            return
        if not self.is_game_just_started:
            # print("in the second if")
            # Simulate the delay and look ahead 2 frames. The simulator class exists already in FightingICE
            self.frame_data = self.simulator.simulate(self.frame_data,
                                                      self.player_num, None,
                                                      None, self.FRAME_AHEAD)
        else:
            # If the game just started, no point on simulating
            self.is_game_just_started = False
        # print("we got passed the if statements ")
        self.cc.setFrameData(self.frame_data, self.player_num)
        # print("state updating!")
        self.state.Update(self.cc, self.frame_data, self.player_num)
        # print("state updates!")

        # distance = self.frame_data.getDistanceX()
        # energy = my.getEnergy()
        # my_x = my.getX()
        # my_state = my.getState()
        # opp_x = opp.getX()
        # opp_state = opp.getState()
        # xDifference = my_x - opp_x
        # print("starting the second set of ifs")
        if self.cc.getSkillFlag():
            # If there is a previous "command" still in execution, then keep doing it
            self.input_key = self.cc.getSkillKey()
            return
        # We empty the keys and cancel skill just in case
        # print("the if is done!")
        self.input_key.empty()
        self.cc.skillCancel()

        # print("setting actions for the state")

        self.state.SetActions(self.frame_data, self.player_num)

        # print("setting actions for the state done!")

        # print("retrying reward")

        reward = abs(self.op_last_hp - self.state.op_char.getHp()) - abs(
            self.my_last_hp - self.state.my_char.getHp())

        # print("it is the HP isn't it")
        self.my_last_hp = self.state.my_char.getHp()
        self.op_last_hp = self.state.op_char.getHp()

        # print("nope maybe next_action?")
        next_action = self.agent.Update(self.frame_data, reward,
                                        self.current_action.action_weight)
        # print("nope")

        # print('-----------------------------------------------')
        # print(self.agent.actions_weights)

        self.current_action = next_action
        # print("trying to get chosen_action")
        # print(self.state.my_actions)
        # print(self.current_action)
        # print(self.current_action.action_index)
        chosen_action = self.state.my_actions[self.current_action.action_index]
        # print("and success!")

        # print("and now execting the action")
        self.ExecuteOption(chosen_action)

    def ExecuteOption(self, action):
        # print(action)
        if type(action) is str:
            # print("action is a string")
            print("The option picked: ", action)
            action_name = action
        else:
            action_name = action.name()

        selected_action = self.ACTION.NEUTRAL
        # print("starting the ifs")
        if "OPTION" in action_name:
            # print("it is an option")
            if "GUARD" in action_name:
                self.action_air = [self.ACTION.AIR_GUARD]
                # print("done with air")
                self.action_ground = [
                    self.ACTION.DASH, self.ACTION.NEUTRAL, self.ACTION.STAND_A,
                    self.ACTION.CROUCH_B, self.ACTION.THROW_A,
                    self.ACTION.STAND_B, self.ACTION.CROUCH_A
                ]
                # print("done with ground")
                self.op_action_air = [
                    self.ACTION.AIR_B, self.ACTION.AIR_DB, self.ACTION.AIR_FB
                ]
                self.op_action_ground = [
                    self.ACTION.STAND, self.ACTION.DASH, self.ACTION.STAND_A,
                    self.ACTION.CROUCH_B, self.ACTION.STAND_B
                ]
                self.simulate_time = 60

            elif "KICK" in action_name:
                # print("I am kicker")
                self.action_air = [self.ACTION.AIR_GUARD]
                # print("done with air")
                self.action_ground = [
                    self.ACTION.STAND, self.ACTION.DASH,
                    self.ACTION.FORWARD_WALK, self.ACTION.CROUCH_A,
                    self.ACTION.CROUCH_B, self.ACTION.CROUCH_FB,
                    self.ACTION.STAND_D_DB_BB
                ]
                # print("done with ground")
                self.op_action_air = [
                    self.ACTION.AIR_B, self.ACTION.AIR_DB, self.ACTION.AIR_FB
                ]
                # print("done with option air")
                self.op_action_ground = [
                    self.ACTION.STAND, self.ACTION.DASH, self.ACTION.CROUCH_FB
                ]
                # print("done with option ground")
                self.simulate_time = 60

            elif "GRAB" in action_name:
                # print("I am grabber")
                self.action_air = [self.ACTION.AIR]
                # print("done with air")
                self.action_ground = [
                    self.ACTION.FORWARD_WALK, self.ACTION.DASH,
                    self.ACTION.STAND_A, self.ACTION.THROW_A
                ]
                # print("done with ground")
                self.op_action_air = [self.ACTION.AIR]
                # print("done with option air")
                self.op_action_ground = [
                    self.ACTION.STAND, self.ACTION.DASH, self.ACTION.STAND_A
                ]
                # print("done with option ground")
                self.simulate_time = 20

            elif "ANTI-AIR" in action_name:
                # print("I am antiair")

                self.action_air = [self.ACTION.AIR_GUARD]
                # print("done with air")
                self.action_ground = [
                    self.ACTION.FORWARD_WALK, self.ACTION.CROUCH_FA,
                    self.ACTION.STAND_FB
                ]
                # print("done with ground")
                self.op_action_air = [self.ACTION.NEUTRAL]
                # print("done with option air")
                self.op_action_ground = [self.ACTION.NEUTRAL]
                # print("done with option ground")
                self.simulate_time = 20

            elif "ALL_ACTIONS" in action_name:
                self.action_air = [
                    self.ACTION.AIR_GUARD, self.ACTION.AIR_A,
                    self.ACTION.AIR_B, self.ACTION.AIR_DA, self.ACTION.AIR_DB,
                    self.ACTION.AIR_FA, self.ACTION.AIR_FB, self.ACTION.AIR_UA,
                    self.ACTION.AIR_UB, self.ACTION.AIR_D_DF_FA,
                    self.ACTION.AIR_D_DF_FB, self.ACTION.AIR_F_D_DFA,
                    self.ACTION.AIR_F_D_DFB, self.ACTION.AIR_D_DB_BA,
                    self.ACTION.AIR_D_DB_BB
                ]
                # print("done with air")

                self.action_ground = [
                    self.ACTION.STAND_D_DB_BA, self.ACTION.BACK_STEP,
                    self.ACTION.FORWARD_WALK, self.ACTION.DASH,
                    self.ACTION.JUMP, self.ACTION.FOR_JUMP,
                    self.ACTION.BACK_JUMP, self.ACTION.STAND_GUARD,
                    self.ACTION.CROUCH_GUARD, self.ACTION.THROW_A,
                    self.ACTION.THROW_B, self.ACTION.STAND_A,
                    self.ACTION.STAND_B, self.ACTION.CROUCH_A,
                    self.ACTION.CROUCH_B, self.ACTION.STAND_FA,
                    self.ACTION.STAND_FB, self.ACTION.CROUCH_FA,
                    self.ACTION.CROUCH_FB, self.ACTION.STAND_D_DF_FA,
                    self.ACTION.STAND_D_DF_FB, self.ACTION.STAND_F_D_DFA,
                    self.ACTION.STAND_F_D_DFB, self.ACTION.STAND_D_DB_BB
                ]
                # print("done with ground")

                self.op_action_air = self.action_air
                # print("done with option air")
                self.op_action_ground = self.action_ground
                # print("done with option ground")
                self.simulate_time = 60

            self.MCTSPrepare()

            root_node = TreeNode(self.gateway, self.simulator_ahead_frame_data,
                                 None, self.my_actions, self.op_actions,
                                 self.game_data, self.player_num, self.cc)

            best_action = root_node.MCTS()
            print("exeuting: ", best_action.name())
            self.cc.commandCall(best_action.name())

    def MCTSPrepare(self):
        # print(self.FRAME_AHEAD)
        self.simulator_ahead_frame_data = self.simulator.simulate(
            self.frame_data, self.player_num, None, None, self.FRAME_AHEAD)

        self.my_char = self.simulator_ahead_frame_data.getCharacter(
            self.player_num)
        self.op_char = self.simulator_ahead_frame_data.getCharacter(
            not self.player_num)

        # print("Getting my actions")
        self.SetMyAction()
        # print("Getting op actions")
        self.SetOpAction()

    def SetMyAction(self):

        # print("clearing my actions")

        self.my_actions = []

        # print("getting eneregy")

        energy = self.my_char.getEnergy()

        #actions.add(self.gateway.jvm.enumerate.Action.)

        # print("checkig if AIR ")
        if str(self.my_char.getState()) == "AIR":
            # print("start of the for loop")
            for i in range(len(self.action_air)):
                # print("checking if we have enough energy")
                if abs(self.my_motion_data[
                        self.gateway.jvm.enumerate.Action.valueOf(
                            self.action_air[i].name()).ordinal()].
                       getAttackStartAddEnergy()) <= energy:
                    self.my_actions.append(self.action_air[i])
        else:
            # print("we are not in the air ")
            # print("checking the motion stuff")
            move_index = self.gateway.jvm.enumerate.Action.valueOf(
                self.sp_skill.name()).ordinal()
            # print("trying motion data: ",abs(self.my_motion_data[move_index].getAttackStartAddEnergy()))
            if abs(self.my_motion_data[move_index].getAttackStartAddEnergy()
                   ) <= energy:
                # print("the if worked")
                self.my_actions.append(self.sp_skill)
                # print("so did the append!")

            for i in range(len(self.action_ground)):
                if abs(self.my_motion_data[
                        self.gateway.jvm.enumerate.Action.valueOf(
                            self.action_ground[i].name()).ordinal()].
                       getAttackStartAddEnergy()) <= energy:
                    self.my_actions.append(self.action_ground[i])

    def SetOpAction(self):

        self.op_actions = []

        energy = self.op_char.getEnergy()

        if str(self.op_char.getState()) == "AIR":
            for i in range(len(self.action_air)):
                if abs(self.op_motion_data[
                        self.gateway.jvm.enumerate.Action.valueOf(
                            self.action_air[i].name()).ordinal()].
                       getAttackStartAddEnergy()) <= energy:
                    self.op_actions.append(self.action_air[i])
        else:
            if abs(self.op_motion_data[self.gateway.jvm.enumerate.Action.
                                       valueOf(self.sp_skill.name()).ordinal(
                                       )].getAttackStartAddEnergy()) <= energy:
                self.op_actions.append(self.sp_skill)

            for i in range(len(self.action_ground)):
                if abs(self.op_motion_data[
                        self.gateway.jvm.enumerate.Action.valueOf(
                            self.action_ground[i].name()).ordinal()].
                       getAttackStartAddEnergy()) <= energy:
                    self.op_actions.append(self.action_ground[i])

    class Java:
        implements = ["aiinterface.AIInterface"]
コード例 #10
0
def main(args):
    os.environ['KMP_WARNINGS'] = '0'
    torch.cuda.manual_seed_all(1)
    np.random.seed(0)

    # filter array
    num_features = [
        args.features * i
        for i in range(1, args.levels + 2 + args.levels_without_sample)
    ]

    # 確定 輸出大小
    target_outputs = int(args.output_size * args.sr)
    # 訓練才保存模型設定參數

    # 設定teacher and student and student_for_backward 超參數

    student_KD = Waveunet(args.channels,
                          num_features,
                          args.channels,
                          levels=args.levels,
                          encoder_kernel_size=args.encoder_kernel_size,
                          decoder_kernel_size=args.decoder_kernel_size,
                          target_output_size=target_outputs,
                          depth=args.depth,
                          strides=args.strides,
                          conv_type=args.conv_type,
                          res=args.res)
    KD_optimizer = Adam(params=student_KD.parameters(), lr=args.lr)
    print(25 * '=' + 'model setting' + 25 * '=')
    print('student_KD: ', student_KD.shapes)
    if args.cuda:
        student_KD = utils.DataParallel(student_KD)
        print("move student_KD to gpu\n")
        student_KD.cuda()

    state = {"step": 0, "worse_epochs": 0, "epochs": 0, "best_pesq": -np.Inf}
    if args.load_model is not None:
        print("Continuing full model from checkpoint " + str(args.load_model))
        state = utils.load_model(student_KD, KD_optimizer, args.load_model,
                                 args.cuda)
    dataset = get_folds(args.dataset_dir, args.outside_test)
    log_dir, checkpoint_dir, result_dir = utils.mkdir_and_get_path(args)
    # print(model)
    if args.test is False:
        writer = SummaryWriter(log_dir)
        # set hypeparameter
        # printing hypeparameters info

        with open(os.path.join(log_dir, 'config.json'), 'w') as f:
            json.dump(args.__dict__, f, indent=5)
        print('saving commandline_args')

        if args.teacher_model is not None:
            print(25 * '=' + 'printing hypeparameters info' + 25 * '=')
            print(f'KD_method  = {args.KD_method}')
            teacher_num_features = [
                24 * i
                for i in range(1, args.levels + 2 + args.levels_without_sample)
            ]
            teacher_model = Waveunet(
                args.channels,
                teacher_num_features,
                args.channels,
                levels=args.levels,
                encoder_kernel_size=args.encoder_kernel_size,
                decoder_kernel_size=args.decoder_kernel_size,
                target_output_size=target_outputs,
                depth=args.depth,
                strides=args.strides,
                conv_type=args.conv_type,
                res=args.res)

            student_copy = Waveunet(
                args.channels,
                num_features,
                args.channels,
                levels=args.levels,
                encoder_kernel_size=args.encoder_kernel_size,
                decoder_kernel_size=args.decoder_kernel_size,
                target_output_size=target_outputs,
                depth=args.depth,
                strides=args.strides,
                conv_type=args.conv_type,
                res=args.res)
            copy_optimizer = Adam(params=student_copy.parameters(), lr=args.lr)

            student_copy2 = Waveunet(
                args.channels,
                num_features,
                args.channels,
                levels=args.levels,
                encoder_kernel_size=args.encoder_kernel_size,
                decoder_kernel_size=args.decoder_kernel_size,
                target_output_size=target_outputs,
                depth=args.depth,
                strides=args.strides,
                conv_type=args.conv_type,
                res=args.res)
            copy2_optimizer = Adam(params=student_copy2.parameters(),
                                   lr=args.lr)

            policy_network = RL(n_inputs=2,
                                kernel_size=6,
                                stride=1,
                                conv_type=args.conv_type,
                                pool_size=4)
            PG_optimizer = Adam(params=policy_network.parameters(),
                                lr=args.RL_lr)
            if args.cuda:
                teacher_model = utils.DataParallel(teacher_model)
                policy_network = utils.DataParallel(policy_network)
                student_copy = utils.DataParallel(student_copy)
                student_copy2 = utils.DataParallel(student_copy2)
                # print("move teacher to gpu\n")
                teacher_model.cuda()
                # print("student_copy  to gpu\n")
                student_copy.cuda()
                # print("student_copy2  to gpu\n")
                student_copy2.cuda()
                # print("move policy_network to gpu\n")
                policy_network.cuda()
            student_size = sum(p.numel() for p in student_KD.parameters())
            teacher_size = sum(p.numel() for p in teacher_model.parameters())
            print('student_parameter count: ', str(student_size))
            print('teacher_model_parameter count: ', str(teacher_size))
            print('RL_parameter count: ',
                  str(sum(p.numel() for p in policy_network.parameters())))
            print(f'compression raito :{100*(student_size/teacher_size)}%')
            if args.teacher_model is not None:
                print("load teacher model" + str(args.teacher_model))
                _ = utils.load_model(teacher_model, None, args.teacher_model,
                                     args.cuda)
                teacher_model.eval()

            if args.load_RL_model is not None:
                print("Continuing full RL_model from checkpoint " +
                      str(args.load_RL_model))
                _ = utils.load_model(policy_network, PG_optimizer,
                                     args.load_RL_model, args.cuda)

        # If not data augmentation, at least crop targets to fit model output shape
        crop_func = partial(crop, shapes=student_KD.shapes)
        ### DATASET
        train_data = SeparationDataset(dataset,
                                       "train",
                                       args.sr,
                                       args.channels,
                                       student_KD.shapes,
                                       False,
                                       args.hdf_dir,
                                       audio_transform=crop_func)
        val_data = SeparationDataset(dataset,
                                     "test",
                                     args.sr,
                                     args.channels,
                                     student_KD.shapes,
                                     False,
                                     args.hdf_dir,
                                     audio_transform=crop_func)
        dataloader = torch.utils.data.DataLoader(
            train_data,
            batch_size=args.batch_size,
            shuffle=False,
            num_workers=args.num_workers,
            worker_init_fn=utils.worker_init_fn,
            pin_memory=True)

        # Set up the loss function
        if args.loss == "L1":
            criterion = nn.L1Loss()
        elif args.loss == "L2":
            criterion = nn.MSELoss()
        else:
            raise NotImplementedError("Couldn't find this loss!")
        My_criterion = customLoss()

        ### TRAINING START
        print('TRAINING START')
        if state["epochs"] > 0:
            state["epochs"] = state["epochs"] + 1
        batch_num = (len(train_data) // args.batch_size)

        if args.teacher_model is not None:
            counting = 0
            PG_lr_scheduler = torch.optim.lr_scheduler.ExponentialLR(
                optimizer=PG_optimizer, gamma=args.decayRate)

            while counting < state["epochs"]:
                PG_optimizer.zero_grad()
                PG_optimizer.step()
                counting += 1
                PG_lr_scheduler.step()
            # print(f'modify lr RL rate : {counting} , until : {state["epochs"]}')
        while state["epochs"] < 100:
            memory_alpha = []
            print("epoch:" + str(state["epochs"]))

            # monitor_value
            total_avg_reward = 0
            total_avg_scalar_reward = 0
            avg_origin_loss = 0
            all_avg_KD_rate = 0
            same = 0
            with tqdm(total=len(dataloader)) as pbar:
                for example_num, (x, targets) in enumerate(dataloader):
                    # if example_num==20:
                    #     break
                    student_KD.train()

                    if args.cuda:
                        x = x.cuda()
                        targets = targets.cuda()
                    if args.teacher_model is not None:
                        student_copy.train()
                        student_copy2.train()
                        # Set LR for this iteration
                        temp = {'state_dict': None, 'optim_dict': None}

                        temp['state_dict'] = copy.deepcopy(
                            student_KD.state_dict())
                        temp['optim_dict'] = copy.deepcopy(
                            KD_optimizer.state_dict())
                        #print('base_model from KD')

                        student_KD.load_state_dict(temp['state_dict'])
                        KD_optimizer.load_state_dict(temp['optim_dict'])

                        student_copy.load_state_dict(temp['state_dict'])
                        copy_optimizer.load_state_dict(temp['optim_dict'])

                        student_copy2.load_state_dict(temp['state_dict'])
                        copy2_optimizer.load_state_dict(temp['optim_dict'])

                        utils.set_cyclic_lr(KD_optimizer, example_num,
                                            len(train_data) // args.batch_size,
                                            args.cycles, args.min_lr, args.lr)
                        utils.set_cyclic_lr(copy_optimizer, example_num,
                                            len(train_data) // args.batch_size,
                                            args.cycles, args.min_lr, args.lr)
                        utils.set_cyclic_lr(copy2_optimizer, example_num,
                                            len(train_data) // args.batch_size,
                                            args.cycles, args.min_lr, args.lr)
                        # forward student and teacher  get output
                        student_KD_output, avg_student_KD_loss = utils.compute_loss(
                            student_KD,
                            x,
                            targets,
                            criterion,
                            compute_grad=False)
                        teacher_output, _ = utils.compute_loss(
                            teacher_model,
                            x,
                            targets,
                            criterion,
                            compute_grad=False)
                        # PG_state
                        diff_from_target = targets.detach(
                        ) - student_KD_output.detach()
                        diff_from_teacher = teacher_output.detach(
                        ) - student_KD_output.detach()
                        PG_state = torch.cat(
                            (diff_from_target, diff_from_teacher), 1)

                        # forward RL get alpha
                        alpha = policy_network(PG_state)
                        nograd_alpha = alpha.detach()

                        avg_KD_rate = torch.mean(nograd_alpha).item()
                        all_avg_KD_rate += avg_KD_rate / batch_num

                        KD_optimizer.zero_grad()
                        KD_outputs, KD_hard_loss, KD_loss, KD_soft_loss = utils.KD_compute_loss(
                            student_KD,
                            teacher_model,
                            x,
                            targets,
                            My_criterion,
                            alpha=nograd_alpha,
                            compute_grad=True,
                            KD_method=args.KD_method)
                        KD_optimizer.step()

                        copy_optimizer.zero_grad()
                        _, _, _, _ = utils.KD_compute_loss(
                            student_copy,
                            teacher_model,
                            x,
                            targets,
                            My_criterion,
                            alpha=1,
                            compute_grad=True,
                            KD_method=args.KD_method)
                        copy_optimizer.step()

                        copy2_optimizer.zero_grad()
                        _, _, _, _ = utils.KD_compute_loss(
                            student_copy2,
                            teacher_model,
                            x,
                            targets,
                            My_criterion,
                            alpha=0,
                            compute_grad=True,
                            KD_method=args.KD_method)
                        copy2_optimizer.step()

                        # calculate backwarded model MSE
                        backward_KD_loss = utils.loss_for_sample(
                            student_KD, x, targets)
                        backward_copy_loss = utils.loss_for_sample(
                            student_copy, x, targets)
                        backward_copy2_loss = utils.loss_for_sample(
                            student_copy2, x, targets)

                        # calculate rewards
                        rewards, same_num, before_decay = utils.get_rewards(
                            backward_KD_loss.detach(),
                            backward_copy_loss.detach(),
                            backward_copy2_loss.detach(),
                            backward_KD_loss.detach(), len(train_data),
                            state["epochs"] + 1)
                        same += same_num
                        rewards = rewards.detach()
                        avg_origin_loss += avg_student_KD_loss / batch_num

                        # avg_reward
                        avg_reward = torch.mean(rewards)
                        avg_scalar_reward = torch.mean(torch.abs(rewards))
                        total_avg_reward += avg_reward.item() / batch_num
                        total_avg_scalar_reward += avg_scalar_reward.item(
                        ) / batch_num
                        # append to memory_alpha
                        nograd_alpha = nograd_alpha.detach().cpu()
                        memory_alpha.append(nograd_alpha.numpy())

                        PG_optimizer.zero_grad()
                        _ = utils.RL_compute_loss(alpha, rewards, nn.MSELoss())
                        PG_optimizer.step()
                        # print info
                        # print(f'avg_KD_rate                 = {avg_KD_rate} ')
                        # print(f'student_KD_loss             = {avg_student_KD_loss}')
                        # print(f'backward_student_copy_loss  = {np.mean(backward_copy_loss.detach().cpu().numpy())}')
                        # print(f'backward_student_KD_loss    = {np.mean(backward_KD_loss.detach().cpu().numpy())}')
                        # print(f'backward_student_copy2_loss = {np.mean(backward_copy2_loss.detach().cpu().numpy())}')
                        # print(f'avg_reward                  = {avg_reward}')
                        # print(f'total_avg_reward            = {total_avg_reward}')
                        # print(f'same                        = {same}')

                        # add to tensorboard
                        writer.add_scalar("student_KD_loss",
                                          avg_student_KD_loss, state["step"])
                        writer.add_scalar(
                            "backward_student_KD_loss",
                            np.mean(backward_KD_loss.detach().cpu().numpy()),
                            state["step"])
                        writer.add_scalar("KD_loss", KD_loss, state["step"])
                        writer.add_scalar("KD_hard_loss", KD_hard_loss,
                                          state["step"])
                        writer.add_scalar("KD_soft_loss", KD_soft_loss,
                                          state["step"])
                        writer.add_scalar("avg_KD_rate", avg_KD_rate,
                                          state["step"])
                        writer.add_scalar("rewards", avg_reward, state["step"])
                        writer.add_scalar("scalar_rewards", avg_scalar_reward,
                                          state["step"])
                        writer.add_scalar("before_decay", before_decay,
                                          state["step"])
                    else:  # no KD training
                        utils.set_cyclic_lr(KD_optimizer, example_num,
                                            len(train_data) // args.batch_size,
                                            args.cycles, args.min_lr, args.lr)
                        KD_optimizer.zero_grad()
                        KD_outputs, KD_hard_loss = utils.compute_loss(
                            student_KD,
                            x,
                            targets,
                            nn.MSELoss(),
                            compute_grad=True)
                        KD_optimizer.step()
                        avg_origin_loss += KD_hard_loss / batch_num
                        writer.add_scalar("student_KD_loss", KD_hard_loss,
                                          state["step"])

                    ### save wav ####
                    if example_num % args.example_freq == 0:
                        input_centre = torch.mean(
                            x[0, :, student_KD.shapes["output_start_frame"]:
                              student_KD.shapes["output_end_frame"]],
                            0)  # Stereo not supported for logs yet

                        # target=torch.mean(targets[0], 0).cpu().numpy()
                        # pred=torch.mean(KD_outputs[0], 0).detach().cpu().numpy()
                        # inputs=input_centre.cpu().numpy()

                        writer.add_audio("input:",
                                         input_centre,
                                         state["step"],
                                         sample_rate=args.sr)
                        writer.add_audio("pred:",
                                         torch.mean(KD_outputs[0], 0),
                                         state["step"],
                                         sample_rate=args.sr)
                        writer.add_audio("target",
                                         torch.mean(targets[0], 0),
                                         state["step"],
                                         sample_rate=args.sr)

                    state["step"] += 1
                    pbar.update(1)
            # VALIDATE
            val_loss, val_metrics = validate(args, student_KD, criterion,
                                             val_data)
            print("ori VALIDATION FINISHED: LOSS: " + str(val_loss))

            choose_val = val_metrics
            if args.teacher_model is not None:
                for i in range(len(nograd_alpha)):
                    writer.add_scalar("KD_rate_" + str(i), nograd_alpha[i],
                                      state["epochs"])
                print(f'all_avg_KD_rate = {all_avg_KD_rate}')
                writer.add_scalar("all_avg_KD_rate", all_avg_KD_rate,
                                  state["epochs"])
                # writer.add_scalar("val_loss_copy", val_loss_copy, state["epochs"])
                writer.add_scalar("total_avg_reward", total_avg_reward,
                                  state["epochs"])
                writer.add_scalar("total_avg_scalar_reward",
                                  total_avg_scalar_reward, state["epochs"])

                RL_checkpoint_path = os.path.join(
                    checkpoint_dir, "RL_checkpoint_" + str(state["epochs"]))
                utils.save_model(policy_network, PG_optimizer, state,
                                 RL_checkpoint_path)
                PG_lr_scheduler.step()

            writer.add_scalar("same", same, state["epochs"])
            writer.add_scalar("avg_origin_loss", avg_origin_loss,
                              state["epochs"])
            writer.add_scalar("val_enhance_pesq", choose_val[0],
                              state["epochs"])
            writer.add_scalar("val_improve_pesq", choose_val[1],
                              state["epochs"])
            writer.add_scalar("val_enhance_stoi", choose_val[2],
                              state["epochs"])
            writer.add_scalar("val_improve_stoi", choose_val[3],
                              state["epochs"])
            writer.add_scalar("val_enhance_SISDR", choose_val[4],
                              state["epochs"])
            writer.add_scalar("val_improve_SISDR", choose_val[5],
                              state["epochs"])
            # writer.add_scalar("val_COPY_pesq",val_metrics_copy[0], state["epochs"])
            writer.add_scalar("val_loss", val_loss, state["epochs"])

            # Set up training state dict that will also be saved into checkpoints
            checkpoint_path = os.path.join(
                checkpoint_dir, "checkpoint_" + str(state["epochs"]))
            if choose_val[0] < state["best_pesq"]:
                state["worse_epochs"] += 1
            else:
                print("MODEL IMPROVED ON VALIDATION SET!")
                state["worse_epochs"] = 0
                state["best_pesq"] = choose_val[0]
                state["best_checkpoint"] = checkpoint_path

            # CHECKPOINT
            print("Saving model...")
            utils.save_model(student_KD, KD_optimizer, state, checkpoint_path)
            print('dump alpha_memory')
            with open(os.path.join(log_dir, 'alpha_' + str(state["epochs"])),
                      "wb") as fp:  #Pickling
                pickle.dump(memory_alpha, fp)

            state["epochs"] += 1
        writer.close()
        info = args.model_name
        path = os.path.join(result_dir, info)
    else:
        PATH = args.load_model.split("/")
        info = PATH[-3] + "_" + PATH[-1]
        if (args.outside_test == True):
            info += "_outside_test"
        print(info)
        path = os.path.join(result_dir, info)

    # test_data = SeparationDataset(dataset, "test", args.sr, args.channels, student_KD.shapes, False, args.hdf_dir, audio_transform=crop_func)

    #### TESTING ####
    # Test loss
    print("TESTING")
    # eval metrics
    #ling_data=get_ling_data_list('/media/hd03/sutsaiwei_data/data/mydata/ling_data')
    #validate(args, student_KD, criterion, test_data)
    #test_metrics = ling_evaluate(args, ling_data['noisy'], student_KD)
    #test_metrics = evaluate_without_noisy(args, dataset["test"], student_KD)
    test_metrics = evaluate(args, dataset["test"], student_KD)
    test_pesq = test_metrics['pesq']
    test_stoi = test_metrics['stoi']
    test_SISDR = test_metrics['SISDR']
    test_noise = test_metrics['noise']

    if not os.path.exists(path):
        os.makedirs(path)
    utils.save_result(test_pesq, path, "pesq")
    utils.save_result(test_stoi, path, "stoi")
    utils.save_result(test_SISDR, path, "SISDR")
    utils.save_result(test_noise, path, "noise")
import numpy as np
import os
import torch
import _pickle as cPickle
from RL import RL
import time

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
localtime = time.asctime(time.localtime(time.time()))
os.system('clear')
data = []

rl = RL(Network='NN_2',
        system_size=3,
        p_error=0.1,
        capacity=200,
        dropout=0.0,
        learning_rate=0.00025,
        discount_factor=0.95)

rl.train_for_n_epochs(training_steps=100,
                      evaluation_steps=100,
                      prediction_steps=10,
                      epochs=10,
                      clip_error_term=5,
                      target_update=10,
                      reward_definition=0,
                      optimizer='Adam',
                      save_model_each_epoch=True,
                      data=data,
                      localtime=localtime)
コード例 #12
0
from RL import RL
from State import State
from Policy import Policy
from pathlib import Path
import numpy

print("___________")
print("TIC TAC TOE")
print("___________\n")
print("Learning...")

trainEpisodes = 100000
AI = RL(0.05)

Qfile = Path("Qvals.npy")
if Qfile.is_file():
    print("Loaded Q File")
    AI.policy.Q = numpy.load("Qvals.npy")
    AI.QLearning(0.95,0.9,0.1,trainEpisodes)

else:
    print("Starting New Training")
    AI.QLearning(0.95,0.9,0.1,trainEpisodes)
numpy.save("Qvals.npy", AI.policy.Q)

'''
Game
'''

while(True):
    val = input("\nEnter 1 to go first, enter otherwise to go second: ")
import json

from Utils import Utils
from RL import RL

from random import randint
import sys

if sys.argv[1] and sys.argv[2]:
    rounds = int(sys.argv[2])
    season = sys.argv[1].lower()

    model_file = season + "_Q.txt"

    utils = Utils()
    rl = RL()

    items_path = os.path.abspath(
        os.path.join(os.path.dirname(__file__), '..', 'shopping/items.csv'))

    items = pd.read_csv(items_path)

    Q_path = os.path.abspath(
        os.path.join(os.path.dirname(__file__), '..',
                     'shopping/models/' + model_file))
    Q = utils.load_Q(Q_path)
    Q = np.asmatrix(Q)

    steps = rl.run(rounds, Q)

    purchased_items = []
import numpy as np
import pandas as pd
import json

from Utils import Utils
from RL import RL

utils = Utils()
rl = RL()

items = pd.read_csv("./items.csv")
print(items)
Q = utils.load_Q("./models/spring_Q.txt")
Q = np.asmatrix(Q)
steps = rl.run(7, Q)

purchased_items = {"items": []}

for step in steps:
    purchased_items['items'].append(items.loc[step])

df = pd.DataFrame(purchased_items)

df.to_json("response2.json")