Ejemplo n.º 1
0
    def work(self, COORD, render=False):
        """Main function of the Workers. This runs the environment and the experience
        is used to update the main Actor Critic Network.
        """
        #Allowing access to the global variables.
        while not COORD.should_stop() and self.sess.run(
                self.global_step) < self.settings["MaxEpisodes"]:

            self.sess.run(self.global_step_next)

            logging = interval_flag(self.sess.run(self.global_step),
                                    self.settings["LogFreq"], 'log')
            saving = interval_flag(self.sess.run(self.global_step),
                                   self.settings["SaveFreq"], 'save')

            s0 = self.env.reset()

            for j in range(self.settings["MaxEpisodeSteps"] + 1):

                a, networkData = self.net.GetAction(state=s0)

                s1, r, done, _ = self.env.step(a)
                if render:
                    self.env.render()

                self.net.AddToTrajectory([s0, a, r, s1, done] + networkData)

                s0 = s1

                if done or j == self.settings[
                        "MaxEpisodeSteps"]:  # update global and assign to local net
                    self.net.Update(self.settings["NetworkHPs"],
                                    self.sess.run(self.global_step))
                    break

            self.progbar.update(self.sess.run(self.global_step))

            if saving:
                self.saver.save(self.sess,
                                self.MODEL_PATH + '/ctf_policy.ckpt',
                                global_step=self.sess.run(self.global_step))

            if logging:
                loggingDict = self.env.getLogging()
                dict = self.net.GetStatistics()
                loggingDict.update(dict)
                Record(loggingDict, self.writer,
                       self.sess.run(self.global_step))
Ejemplo n.º 2
0
    def work(self, COORD, render=False):
        """Main function of the Workers. This runs the environment and the experience
        is used to update the main Actor Critic Network.
        """
        #Allowing access to the global variables.
        while not COORD.should_stop() and self.sess.run(
                self.global_step) < self.settings["MaxEpisodes"]:

            logging = interval_flag(self.sess.run(self.global_step),
                                    self.settings["LogFreq"], 'logNet')

            self.net.Update(self.settings["NetworkHPs"],
                            self.sess.run(self.global_step))
            if logging:
                loggingDict = self.net.GetStatistics()
                Record(loggingDict, self.writer,
                       self.sess.run(self.global_step))
Ejemplo n.º 3
0
    def work(self, COORD, render=False):
        """Main function of the Workers. This runs the environment and the experience
        is used to update the main Actor Critic Network.
        """
        #Allowing access to the global variables.
        while not COORD.should_stop() and self.sess.run(
                self.global_step) < self.settings["MaxEpisodes"]:

            self.sess.run(self.global_step_next)

            logging = interval_flag(self.sess.run(self.global_step),
                                    self.settings["LogFreq"], 'log')
            saving = interval_flag(self.sess.run(self.global_step),
                                   self.settings["SaveFreq"], 'save')

            #Initializing environment and storage variables:
            s0 = self.env.reset()
            a_past = [0]
            r_i_past = [0.0]
            r_e_past = [0.0]

            for j in range(self.settings["MaxEpisodeSteps"] + 1):

                a, networkData = self.net.GetAction(state=s0,
                                                    episode=self.sess.run(
                                                        self.global_step),
                                                    step=j,
                                                    a_past=a_past,
                                                    r_i_past=r_i_past,
                                                    r_e_past=r_e_past)
                #networkData is expected to be [betaVal,betaOH]

                s1, r, done, _ = self.env.step(a)
                if render:
                    self.env.render()

                #Calculating Intrinsic Reward of the state:
                r_intrinsic = self.net.GetIntrinsicReward(s0, s1)
                r_total = r + networkData[0] * r_intrinsic

                #Adding to the trajectory
                self.net.AddToTrajectory(
                    [s0, a, r_total, s1, done, a_past, r_i_past, r_e_past] +
                    networkData)

                #Updating the storage variables.
                s0 = s1
                a_past = a
                r_i_past = [r_intrinsic]
                r_e_past = r

                #Pushing entire trajectory to the buffer
                if done or j == self.settings["MaxEpisodeSteps"]:
                    self.net.PushToBuffer()
                    break

            self.progbar.update(self.sess.run(self.global_step))
            if logging:
                loggingDict = self.env.getLogging()
                Record(loggingDict, self.writer,
                       self.sess.run(self.global_step))
            if saving:
                self.saver.save(self.sess,
                                self.MODEL_PATH + '/ctf_policy.ckpt',
                                global_step=self.sess.run(self.global_step))
Ejemplo n.º 4
0
writer = tf.summary.FileWriter(LOG_PATH,graph=sess.graph)
saver = tf.train.Saver(max_to_keep=3, var_list=net.getVars+[global_step])
net.InitializeVariablesFromFile(saver,MODEL_PATH_)
InitializeVariables(sess) #Included to catch if there are any uninitalized variables.

progbar = tf.keras.utils.Progbar(None, unit_name='Training',stateful_metrics=["Reward"])

loggingFunctions=[]
for loggingFunc in settings["LoggingFunctions"]:
    func = GetFunction(loggingFunc)
    loggingFunctions.append(func(env,net,IMAGE_PATH))

for i in range(settings["MAX_EP"]):

    sess.run(global_step_next)
    logging = interval_flag(sess.run(global_step), settings["LogFreq"], 'log')
    saving = interval_flag(sess.run(global_step), settings["SaveFreq"], 'save')

    s0 = env.reset()

    for j in range(settings["MAX_EP_STEPS"]+1):
        updating = interval_flag(j, settings['UPDATE_GLOBAL_ITER'], 'update')

        a_hier, networkData = net.GetAction(state=s0,episode=sess.run(global_step),step=j)
        a = UseSubpolicy(s0,a_hier)
        s1,r,done,_ = env.step(action=a)

        net.AddToTrajectory([s0,a_hier,r,s1,done]+networkData)

        s0 = s1
        if updating:   # update global and assign to local net
Ejemplo n.º 5
0
InitializeVariables(
    sess)  #Included to catch if there are any uninitalized variables.

progbar = tf.keras.utils.Progbar(None,
                                 unit_name='Training',
                                 stateful_metrics=["Reward"])

if "LoggingFunctions" in settings:
    loggingFunctions = []
    for loggingFunc in settings["LoggingFunctions"]:
        func = GetFunction(loggingFunc)
        loggingFunctions.append(func(env, net, IMAGE_PATH))
for i in range(settings["MaxEpisodes"]):

    sess.run(global_step_next)
    logging = interval_flag(sess.run(global_step), settings["LogFreq"], 'log')
    saving = interval_flag(sess.run(global_step), settings["SaveFreq"], 'save')

    s0 = env.reset(next_config=net.next_task())

    for j in range(settings["MaxEpisodeSteps"] + 1):

        a, networkData = net.GetAction(state=s0,
                                       episode=sess.run(global_step),
                                       step=j)

        s1, r, done, info = env.step(action=a)
        net.AddToTrajectory([s0, a, r, s1, done] + networkData)
        if args.render:
            env.render()
        s0 = s1