Python Pairwise.reset Examples

Programming Language: Python

Namespace/Package Name: DQNalign.tool.RL.alignment

Class/Type: Pairwise

Method/Function: reset

Examples at hotexamples.com: 2

Python Pairwise.reset - 2 examples found. These are the top rated real world Python examples of DQNalign.tool.RL.alignment.Pairwise.reset extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

Pairwise(6)

reset(2)

renderDiff(1)

renderEnv(1)

renderFFT(1)

renderRC(1)

renderRCRev(1)

renderRev(1)

step(1)

stepDiff(1)

stepFFT(1)

stepRC(1)

test(1)

teststep(1)

Example #1

Show file

File: agent.py Project: syjqkrtk/DQNalign

class Agent():
    def __init__(self,
                 FLAGS,
                 istrain,
                 game_env,
                 model,
                 seq1=[],
                 seq2=[],
                 ismeta=False):
        """ Get parameters from files """
        self.FLAGS = FLAGS
        self.istrain = istrain

        if ismeta:
            self.param = import_module('DQNalign.param.MAML')
        else:
            self.param = import_module('DQNalign.param.' +
                                       self.FLAGS.network_set)
            """ Exploration strategy """
            if self.istrain:
                self.l_seq = game_env.l_seq
                self.e = self.param.startE
                self.stepDrop = (self.param.startE -
                                 self.param.endE) / self.param.annealing_steps
        """ Define sequence alignment environment """
        if self.istrain:
            self.env = Pairwise(game_env, 0, Z=self.param.Z)
        else:
            if len(seq1) + len(seq2) > 0:
                self.env = Pairwise(game_env, 1, seq1, seq2, Z=self.param.Z)
            else:
                self.env = Pairwise(game_env, 0, Z=self.param.Z)

        if ismeta:
            self.mainQN = model.mainQN
            self.tempQN = model.targetQN
            self.trainables = model.trainables
            self.copyOps = model.copyOps
        elif (self.FLAGS.model_name
              == "DQN") or (self.FLAGS.model_name == "SSD") or (
                  self.FLAGS.model_name == "DiffSSD") or (self.FLAGS.model_name
                                                          == "FFTDQN"):
            self.mainQN = model.mainQN
            self.targetQN = model.targetQN
            self.trainables = model.trainables
            self.targetOps = model.targetOps
        """ Initialize the variables """
        self.total_steps = 0
        self.start = time.time()
        self.myBuffer = experience_buffer()

    def reset(self):
        """ Define sequence alignment environment """
        self.istrain = True
        self.env.sizeS1 = self.l_seq[0]
        self.env.sizeS2 = self.l_seq[1]

    def set(self, seq1=[], seq2=[]):
        """ Define sequence alignment environment """
        self.istrain = False
        self.env.test(seq1, seq2)

    def train(self, sess):
        trainBatch = self.myBuffer.sample(
            self.param.batch_size
        )  # Select the batch from the experience buffer
        #print(np.shape(np.vstack(trainBatch[:, 3])))

        if (self.FLAGS.model_name
                == "DQN") or (self.FLAGS.model_name == "SSD") or (
                    self.FLAGS.model_name
                    == "DiffSSD") or (self.FLAGS.model_name == "FFTDQN"):
            # The estimated Q value from main network is Q1, from target network is Q2
            Q1 = sess.run(self.mainQN.predict,
                          feed_dict={
                              self.mainQN.scalarInput: np.vstack(trainBatch[:,
                                                                            3])
                          })
            Q2 = sess.run(self.targetQN.Qout,
                          feed_dict={
                              self.targetQN.scalarInput:
                              np.vstack(trainBatch[:, 3])
                          })

            # trainBatch[:,4] means that the action was the last step of the episode
            # If the action is the last step, the reward is used for update Q value
            # IF not, the Q value is updated as follows:
            # Qmain(s,a) = r(s,a) + yQtarget(s1,argmaxQmain(s1,a))
            end_multiplier = -(trainBatch[:, 4] - 1)
            doubleQ = Q2[range(self.param.batch_size), Q1]
            targetQ = trainBatch[:,
                                 2] + (self.param.y * doubleQ * end_multiplier)
            _, loss = sess.run(
                [self.mainQN.updateModel, self.mainQN.loss],
                feed_dict={
                    self.mainQN.scalarInput: np.vstack(trainBatch[:, 0]),
                    self.mainQN.targetQ: targetQ,
                    self.mainQN.actions: trainBatch[:, 1]
                })
            updateTarget(self.targetOps,
                         sess)  # Update target network with 'tau' ratio

    def skip(self):
        a = []

        seq1end = min(self.env.x + self.env.win_size - 1, self.env.sizeS1 - 1)
        seq2end = min(self.env.y + self.env.win_size - 1, self.env.sizeS2 - 1)
        minend = min(seq1end - self.env.x, seq2end - self.env.y)
        diff = np.where(
            self.env.seq1[self.env.x:self.env.x + minend +
                          1] != self.env.seq2[self.env.y:self.env.y + minend +
                                              1])
        if np.size(diff) > 0:
            a = [0] * np.min(diff)
        else:
            a = [0] * minend

        return a

    def reverseskip(self):
        a = []

        seq1end = max(self.env.x - self.env.win_size + 1, 0)
        seq2end = max(self.env.y - self.env.win_size + 1, 0)
        minend = min(self.env.x - seq1end, self.env.y - seq2end)
        diff = np.where(
            self.env.seq1[self.env.x - minend:self.env.x + 1][::-1] !=
            self.env.seq2[self.env.y - minend:self.env.y + 1][::-1])
        if np.size(diff) > 0:
            a = [0] * np.max(diff)
        else:
            a = [0] * minend

        return a

    def skipRC(self):
        a = []

        seq1end = min(self.env.x + self.env.win_size - 1, self.env.sizeS1 - 1)
        seq2end = max(self.env.y - self.env.win_size + 1, 0)
        minend = min(seq1end - self.env.x, self.env.y - seq2end)
        diff = np.where(
            self.env.seq1[self.env.x:self.env.x + minend +
                          1] != self.env.rev2[self.env.y - minend:self.env.y +
                                              1][::-1])
        if np.size(diff) > 0:
            a = [0] * np.min(diff)
        else:
            a = [0] * minend

        return a

    def reverseskipRC(self):
        a = []

        seq1end = max(self.env.x - self.env.win_size + 1, 0)
        seq2end = min(self.env.y + self.env.win_size - 1, self.env.sizeS2 - 1)
        minend = min(self.env.x - seq1end, seq2end - self.env.y)
        diff = np.where(
            self.env.seq1[self.env.x - minend:self.env.x +
                          1][::-1] != self.env.rev2[self.env.y:self.env.y +
                                                    minend + 1])
        if np.size(diff) > 0:
            a = [0] * np.max(diff)
        else:
            a = [0] * minend

        return a

    def metatrain(self, sess, mainBuffer=False, X=0):
        episodeBuffer = experience_buffer()
        if self.istrain:
            # Environment reset for each episode
            s1 = self.env.reset(
            )  # Rendered image of the alignment environment
            s1 = processState(s1)
        else:
            s1 = processState(self.env.renderEnv())

        d = False  # The state of the game (End or Not)
        j = 0
        rT1 = 0  # Total reward
        rT2 = 0  # Total reward
        best = 0
        flag = False

        while j < self.env.sizeS1 + self.env.sizeS2:  # Training step is proceeded until the maximum episode length
            if self.env.seq1[self.env.x] == self.env.seq2[self.env.y]:
                a = self.skip()
            else:
                s1 = processState(self.env.renderEnv())
                a = sess.run(self.tempQN.predict,
                             feed_dict={self.tempQN.scalarInput: [s1]})

            #print(self.env.x,self.env.y,a,self.env.seq1[self.env.x],self.env.seq2[self.env.y],j,rT1)
            # Update the DQN network
            # Calculate the change of the state, reward and d(one)
            for _ in range(np.size(a)):
                s = s1
                s1, r, d = self.env.step(a[_])
                s1 = processState(s1)
                episodeBuffer.add(
                    np.reshape(np.array([s, a[_], r, s1, d]),
                               [1, 5]))  # Save the result into episode buffer
                rT1 += r
                rT2 += (r > 0)
                j += 1
                if rT1 >= best:
                    best = rT1

                # if score drops more than X, extension will be ended
                if (rT1 < best - X) and (X > 0):
                    flag = True
                    break

                if d == True:
                    break

            if (j % self.param.update_freq
                    == 0) and (j >= self.param.batch_size) and self.istrain:
                #print(j, self.env.x, self.env.y)
                # update the temp network
                trainBatch = episodeBuffer.sample(
                    self.param.batch_size
                )  # Select the batch from the experience buffer

                # The estimated Q value from main network is Q1, from target network is Q2
                Q1 = sess.run(self.tempQN.predict,
                              feed_dict={
                                  self.tempQN.scalarInput:
                                  np.vstack(trainBatch[:, 3])
                              })
                Q2 = sess.run(self.tempQN.Qout,
                              feed_dict={
                                  self.tempQN.scalarInput:
                                  np.vstack(trainBatch[:, 3])
                              })

                # trainBatch[:,4] means that the action was the last step of the episode
                # If the action is the last step, the reward is used for update Q value
                # IF not, the Q value is updated as follows:
                # Qmain(s,a) = r(s,a) + yQtarget(s1,argmaxQmain(s1,a))
                end_multiplier = -(trainBatch[:, 4] - 1)
                doubleQ = Q2[range(self.param.batch_size), Q1]
                targetQ = trainBatch[:, 2] + (self.param.y * doubleQ *
                                              end_multiplier)
                _ = sess.run(self.tempQN.updateModel,
                             feed_dict={
                                 self.tempQN.scalarInput:
                                 np.vstack(trainBatch[:, 0]),
                                 self.tempQN.targetQ:
                                 targetQ,
                                 self.tempQN.actions:
                                 trainBatch[:, 1]
                             })

            if d == True:
                break

            if flag == True:
                break

        if self.istrain:
            # Environment reset for each episode
            s1 = self.env.reset(
            )  # Rendered image of the alignment environment
            s1 = processState(s1)

            d = False  # The state of the game (End or Not)
            j = 0
            rT1 = 0  # Total reward
            rT2 = 0  # Total reward

            while j < self.env.sizeS1 + self.env.sizeS2:  # Training step is proceeded until the maximum episode length
                if self.env.seq1[self.env.x] == self.env.seq2[self.env.y]:
                    a = self.skip()
                else:
                    s1 = processState(self.env.renderEnv())
                    a = sess.run(self.tempQN.predict,
                                 feed_dict={self.tempQN.scalarInput: [s1]})

                # Update the DQN network
                # Calculate the change of the state, reward and d(one)
                for _ in range(np.size(a)):
                    s = s1
                    s1, r, d = self.env.step(a[_])
                    s1 = processState(s1)
                    mainBuffer.add(
                        np.reshape(
                            np.array([s, a[_], r, s1, d]),
                            [1, 5]))  # Save the result into episode buffer
                    rT1 += r
                    rT2 += (r > 0)
                    j += 1

                    if d == True:
                        break

                if d == True:
                    break

        return rT1, rT2, j, mainBuffer

    def metatrain2(self, sess, mainBuffer=False, X=0):
        episodeBuffer = experience_buffer()
        if self.istrain:
            # Environment reset for each episode
            s1 = self.env.reset(
                2)  # Rendered image of the alignment environment
            s1 = processState(s1)
        else:
            s1 = processState(self.env.renderDiff())

        d = False  # The state of the game (End or Not)
        j = 0
        rT1 = 0  # Total reward
        rT2 = 0  # Total reward
        best = 0
        flag = False

        while j < self.env.sizeS1 + self.env.sizeS2:  # Training step is proceeded until the maximum episode length
            if self.env.seq1[self.env.x] == self.env.seq2[self.env.y]:
                a = self.skip()
            else:
                s1 = processState(self.env.renderDiff())
                a = sess.run(self.tempQN.predict,
                             feed_dict={self.tempQN.scalarInput: [s1]})

            #print(self.env.x,self.env.y,a,self.env.seq1[self.env.x],self.env.seq2[self.env.y],j,rT1)
            # Update the DQN network
            # Calculate the change of the state, reward and d(one)
            for _ in range(np.size(a)):
                s = s1
                s1, r, d = self.env.stepDiff(a[_])
                s1 = processState(s1)
                episodeBuffer.add(
                    np.reshape(np.array([s, a[_], r, s1, d]),
                               [1, 5]))  # Save the result into episode buffer
                rT1 += r
                rT2 += (r > 0)
                j += 1
                if rT1 >= best:
                    best = rT1

                # if score drops more than X, extension will be ended
                if (rT1 < best - X) and (X > 0):
                    flag = True
                    break

                if d == True:
                    break

            if (j % self.param.update_freq
                    == 0) and (j >= self.param.batch_size) and self.istrain:
                #print(j, self.env.x, self.env.y)
                # update the temp network
                trainBatch = episodeBuffer.sample(
                    self.param.batch_size
                )  # Select the batch from the experience buffer

                # The estimated Q value from main network is Q1, from target network is Q2
                Q1 = sess.run(self.tempQN.predict,
                              feed_dict={
                                  self.tempQN.scalarInput:
                                  np.vstack(trainBatch[:, 3])
                              })
                Q2 = sess.run(self.tempQN.Qout,
                              feed_dict={
                                  self.tempQN.scalarInput:
                                  np.vstack(trainBatch[:, 3])
                              })

                # trainBatch[:,4] means that the action was the last step of the episode
                # If the action is the last step, the reward is used for update Q value
                # IF not, the Q value is updated as follows:
                # Qmain(s,a) = r(s,a) + yQtarget(s1,argmaxQmain(s1,a))
                end_multiplier = -(trainBatch[:, 4] - 1)
                doubleQ = Q2[range(self.param.batch_size), Q1]
                targetQ = trainBatch[:, 2] + (self.param.y * doubleQ *
                                              end_multiplier)
                _ = sess.run(self.tempQN.updateModel,
                             feed_dict={
                                 self.tempQN.scalarInput:
                                 np.vstack(trainBatch[:, 0]),
                                 self.tempQN.targetQ:
                                 targetQ,
                                 self.tempQN.actions:
                                 trainBatch[:, 1]
                             })

            if d == True:
                break

            if flag == True:
                break

        if self.istrain:
            # Environment reset for each episode
            s1 = self.env.reset(
                2)  # Rendered image of the alignment environment
            s1 = processState(s1)

            d = False  # The state of the game (End or Not)
            j = 0
            rT1 = 0  # Total reward
            rT2 = 0  # Total reward

            while j < self.env.sizeS1 + self.env.sizeS2:  # Training step is proceeded until the maximum episode length
                if self.env.seq1[self.env.x] == self.env.seq2[self.env.y]:
                    a = self.skip()
                else:
                    s1 = processState(self.env.renderDiff())
                    a = sess.run(self.tempQN.predict,
                                 feed_dict={self.tempQN.scalarInput: [s1]})

                # Update the DQN network
                # Calculate the change of the state, reward and d(one)
                for _ in range(np.size(a)):
                    s = s1
                    s1, r, d = self.env.stepDiff(a[_])
                    s1 = processState(s1)
                    mainBuffer.add(
                        np.reshape(
                            np.array([s, a[_], r, s1, d]),
                            [1, 5]))  # Save the result into episode buffer
                    rT1 += r
                    rT2 += (r > 0)
                    j += 1

                    if d == True:
                        break

                if d == True:
                    break

        return rT1, rT2, j, mainBuffer

    def Global(self, sess, record=0):
        # Newly define experience buffer for new episode
        past = time.time()
        if self.FLAGS.show_align:
            dot_plot = 255 * np.ones((self.env.sizeS1, self.env.sizeS2))
        if self.FLAGS.print_align:
            Nucleotide = ["N", "A", "C", "G", "T"]
        if self.istrain:
            episodeBuffer = experience_buffer()
            # Environment reset for each episode
            s1 = self.env.reset(
            )  # Rendered image of the alignment environment
            s1 = processState(s1)  # Resize to 1-dimensional vector
        else:
            s = processState(self.env.renderEnv())

        d = False  # The state of the game (End or Not)
        rT1 = 0  # Total reward
        rT2 = 0  # Total match
        j = 0

        while j < self.env.sizeS1 + self.env.sizeS2:  # Training step is proceeded until the maximum episode length
            #print(self.env.x, self.env.y)
            if self.FLAGS.display_process:
                if j % 1000 == 0:
                    now = time.time()

            # Exploration step
            if self.env.seq1[self.env.x] == self.env.seq2[self.env.y]:
                a = self.skip()
            elif self.istrain:
                if self.FLAGS.exploration == "e-greedy":
                    if (np.random.rand(1) < self.e
                            or self.total_steps < self.param.pre_train_steps):
                        a = [np.random.randint(0, self.param.n_action)]
                    else:
                        s1 = processState(self.env.renderEnv())
                        a = sess.run(self.mainQN.predict,
                                     feed_dict={self.mainQN.scalarInput: [s1]})
                elif self.FLAGS.exploration == "boltzmann":
                    temp = self.e
                    s1 = processState(self.env.renderEnv())
                    Qprobs = sess.run(self.mainQN.Qdist,
                                      feed_dict={
                                          self.mainQN.scalarInput: [s1],
                                          self.mainQN.Temp: [temp]
                                      })
                    action_value = np.random.choice(Qprobs[0], p=Qprobs[0])
                    a = [np.argmax(Qprobs[0] == action_value)]
                elif self.FLAGS.exploration == "bayesian":
                    keep = 1 - self.e
                    temp = self.e
                    s1 = processState(self.env.renderEnv())
                    Qprobs = sess.run(self.mainQN.Qdist,
                                      feed_dict={
                                          self.mainQN.scalarInput: [s1],
                                          self.mainQN.Temp: [temp],
                                          self.mainQN.keep_per: [keep]
                                      })
                    action_value = np.random.choice(Qprobs[0], p=Qprobs[0])
                    a = [np.argmax(Qprobs[0] == action_value)]
            else:
                #test = time.time()
                s1 = processState(self.env.renderEnv())
                #print("Rendering stage :",time.time()-test)
                #test = time.time()
                a = sess.run(self.mainQN.predict,
                             feed_dict={self.mainQN.scalarInput: [s1]})
                #print("Prediction stage :",time.time()-test)
                #test = time.time()

            # Update the DQN network
            if self.istrain:
                # Calculate the change of the state, reward and done
                s = s1
                s1, r, d = self.env.step(a[0])
                j += 1
                s1 = processState(s1)
                self.total_steps += 1
                rT1 += r
                rT2 += (r > 0)
                episodeBuffer.add(
                    np.reshape(np.array([s, a[0], r, s1, d]),
                               [1, 5]))  # Save the result into episode buffer

                if self.total_steps > self.param.pre_train_steps:
                    # Refresh exploration probability (epsilon-greedy)
                    if self.e > self.param.endE:
                        self.e -= self.stepDrop

                    # For every update_freq, update the main network
                    if self.total_steps % (self.param.update_freq) == 0:
                        self.train(sess)
                        #print("Training stage :",time.time()-test)
            else:
                for _ in range(np.size(a)):
                    if self.FLAGS.show_align:
                        dot_plot[self.env.x][self.env.y] = 0
                    if self.FLAGS.print_align:
                        record.record(
                            self.env.x, self.env.y, a[_],
                            Nucleotide[self.env.seq1[self.env.x] + 1],
                            Nucleotide[self.env.seq2[self.env.y] + 1])

                    r, d = self.env.teststep(a[_])
                    j += 1
                    rT1 += r
                    rT2 += (r > 0)
                    if d == True:
                        break
                #print("Do step stage :",time.time()-test)

            if d == True:
                break

            if self.FLAGS.display_process:
                if j % 1000 == 1000 - 1:
                    print("Align step is processed :", j + 1, "with",
                          time.time() - now)

        # Add the results of the episode into the total results
        if self.istrain:
            self.myBuffer.add(episodeBuffer.buffer)

        now = time.time()
        if self.FLAGS.show_align and self.FLAGS.print_align:
            return rT1, rT2, now - past, j, dot_plot
        elif self.FLAGS.show_align:
            return rT1, rT2, now - past, j, dot_plot
        elif self.FLAGS.print_align:
            return rT1, rT2, now - past, j
        return rT1, rT2, now - past, j

    def DiffGlobal(self, sess, record=0):
        # Newly define experience buffer for new episode
        past = time.time()
        if self.FLAGS.show_align:
            dot_plot = 255 * np.ones((self.env.sizeS1, self.env.sizeS2))
        if self.FLAGS.print_align:
            Nucleotide = ["N", "A", "C", "G", "T"]
        if self.istrain:
            episodeBuffer = experience_buffer()
            # Environment reset for each episode
            s1 = self.env.reset(
                2)  # Rendered image of the alignment environment
            s1 = processState(s1)  # Resize to 1-dimensional vector
        else:
            s = processState(self.env.renderDiff())

        d = False  # The state of the game (End or Not)
        rT1 = 0  # Total reward
        rT2 = 0  # Total match
        j = 0

        while j < self.env.sizeS1 + self.env.sizeS2:  # Training step is proceeded until the maximum episode length
            if self.FLAGS.display_process:
                if j % 1000 == 0:
                    now = time.time()

            # Exploration step
            if self.istrain and (np.random.rand(1) < self.e or self.total_steps
                                 < self.param.pre_train_steps):
                a = [np.random.randint(0, self.param.n_action)]
            elif self.env.seq1[self.env.x] == self.env.seq2[self.env.y]:
                a = self.skip()
            else:
                #test = time.time()
                s1 = processState(self.env.renderDiff())
                #print("Rendering stage :",time.time()-test)
                #test = time.time()
                a = sess.run(self.mainQN.predict,
                             feed_dict={self.mainQN.scalarInput: [s1]})
                #print("Prediction stage :",time.time()-test)
                #test = time.time()

            # Update the DQN network
            if self.istrain:
                # Calculate the change of the state, reward and d(one)
                s = s1
                s1, r, d = self.env.stepDiff(a[0])
                j += 1
                s1 = processState(s1)
                self.total_steps += 1
                rT1 += r
                rT2 += (r > 0)
                episodeBuffer.add(
                    np.reshape(np.array([s, a[0], r, s1, d]),
                               [1, 5]))  # Save the result into episode buffer

                if self.total_steps > self.param.pre_train_steps:
                    # Refresh exploration probability (epsilon-greedy)
                    if self.e > self.param.endE:
                        self.e -= self.stepDrop

                    # For every update_freq, update the main network
                    if self.total_steps % (self.param.update_freq) == 0:
                        self.train(sess)
                        #print("Training stage :",time.time()-test)
            else:
                for _ in range(np.size(a)):
                    if self.FLAGS.show_align:
                        dot_plot[self.env.x][self.env.y] = 0
                    if self.FLAGS.print_align:
                        record.record(
                            self.env.x, self.env.y, a[_],
                            Nucleotide[self.env.seq1[self.env.x] + 1],
                            Nucleotide[self.env.seq2[self.env.y] + 1])

                    r, d = self.env.teststep(a[_])
                    j += 1
                    rT1 += r
                    rT2 += (r > 0)
                    if d == True:
                        break
                #print("Do step stage :",time.time()-test)

            if d == True:
                break

            if self.FLAGS.display_process:
                if j % 1000 == 1000 - 1:
                    print("Align step is processed :", j + 1, "with",
                          time.time() - now)

        # Add the results of the episode into the total results
        if self.istrain:
            self.myBuffer.add(episodeBuffer.buffer)

        now = time.time()
        if self.FLAGS.show_align and self.FLAGS.print_align:
            return rT1, rT2, now - past, j, dot_plot
        elif self.FLAGS.show_align:
            return rT1, rT2, now - past, j, dot_plot
        elif self.FLAGS.print_align:
            return rT1, rT2, now - past, j
        return rT1, rT2, now - past, j

    def FFTGlobal(self, sess, record=0):
        # Newly define experience buffer for new episode
        past = time.time()
        if self.FLAGS.show_align:
            dot_plot = 255 * np.ones((self.env.sizeS1, self.env.sizeS2))
        if self.FLAGS.print_align:
            Nucleotide = ["N", "A", "C", "G", "T"]
        if self.istrain:
            episodeBuffer = experience_buffer()
            # Environment reset for each episode
            s1 = self.env.reset(
                3)  # Rendered image of the alignment environment
            s1 = processState(s1)  # Resize to 1-dimensional vector
        else:
            s = processState(self.env.renderFFT())

        d = False  # The state of the game (End or Not)
        rT1 = 0  # Total reward
        rT2 = 0  # Total match
        j = 0

        while j < self.env.sizeS1 + self.env.sizeS2:  # Training step is proceeded until the maximum episode length
            if self.FLAGS.display_process:
                if j % 1000 == 0:
                    now = time.time()

            # Exploration step
            if self.istrain and (np.random.rand(1) < self.e or self.total_steps
                                 < self.param.pre_train_steps):
                a = [np.random.randint(0, self.param.n_action)]
            elif self.env.seq1[self.env.x] == self.env.seq2[self.env.y]:
                a = self.skip()
            else:
                #test = time.time()
                s1 = processState(self.env.renderFFT())
                #print("Rendering stage :",time.time()-test)
                #test = time.time()
                a = sess.run(self.mainQN.predict,
                             feed_dict={self.mainQN.scalarInput: [s1]})
                #print("Prediction stage :",time.time()-test)
                #test = time.time()

            # Update the DQN network
            if self.istrain:
                # Calculate the change of the state, reward and d(one)
                s = s1
                s1, r, d = self.env.stepFFT(a[0])
                j += 1
                s1 = processState(s1)
                self.total_steps += 1
                rT1 += r
                rT2 += (r > 0)
                episodeBuffer.add(
                    np.reshape(np.array([s, a[0], r, s1, d]),
                               [1, 5]))  # Save the result into episode buffer

                if self.total_steps > self.param.pre_train_steps:
                    # Refresh exploration probability (epsilon-greedy)
                    if self.e > self.param.endE:
                        self.e -= self.stepDrop

                    # For every update_freq, update the main network
                    if self.total_steps % (self.param.update_freq) == 0:
                        self.train(sess)
                        #print("Training stage :",time.time()-test)
            else:
                for _ in range(np.size(a)):
                    if self.FLAGS.show_align:
                        dot_plot[self.env.x][self.env.y] = 0
                    if self.FLAGS.print_align:
                        record.record(
                            self.env.x, self.env.y, a[_],
                            Nucleotide[self.env.seq1[self.env.x] + 1],
                            Nucleotide[self.env.seq2[self.env.y] + 1])

                    r, d = self.env.teststep(a[_])
                    j += 1
                    rT1 += r
                    rT2 += (r > 0)
                    if d == True:
                        break
                #print("Do step stage :",time.time()-test)

            if d == True:
                break

            if self.FLAGS.display_process:
                if j % 1000 == 1000 - 1:
                    print("Align step is processed :", j + 1, "with",
                          time.time() - now)

        # Add the results of the episode into the total results
        if self.istrain:
            self.myBuffer.add(episodeBuffer.buffer)

        now = time.time()
        if self.FLAGS.show_align and self.FLAGS.print_align:
            return rT1, rT2, now - past, j, dot_plot
        elif self.FLAGS.show_align:
            return rT1, rT2, now - past, j, dot_plot
        elif self.FLAGS.print_align:
            return rT1, rT2, now - past, j
        return rT1, rT2, now - past, j

    def Local(self, sess, uX1, uX2, uY1, uY2, X):
        # Reverse Complement 기능 추가해야함
        # Newly define experience buffer for new episode
        if uY1 < uY2:
            RCmode = 0
        else:
            RCmode = 1

        past = time.time()

        rT1o = 0
        rT2o = 0

        pathx = []
        pathy = []

        d = False  # The state of the game (End or Not)
        rT1 = 1  # Total reward
        rT2 = 1  # Total match
        j = 0

        if RCmode == 0:
            best = 1
            best2 = 1
            flag = 0
            pathx1 = []
            pathy1 = []

            #Forward Extension
            if (uX2 + 1 <= self.env.sizeS1) and (uY2 + 1 <= self.env.sizeS2):
                self.env.x = uX2 + 1
                self.env.y = uY2 + 1
                pathx1.append(self.env.x)
                pathy1.append(self.env.y)
                bestxy = [self.env.x, self.env.y]

                while j < self.env.sizeS1 + self.env.sizeS2 - uX2 - uY2:
                    # Skip process
                    if self.env.seq1[self.env.x] == self.env.seq2[self.env.y]:
                        a = self.skip()
                    else:
                        #test = time.time()
                        s1 = processState(self.env.renderEnv())
                        #print("Rendering stage :",time.time()-test)
                        #test = time.time()
                        a = sess.run(self.mainQN.predict,
                                     feed_dict={self.mainQN.scalarInput: [s1]})
                        #print("Prediction stage :",time.time()-test)
                        #test = time.time()

                    for _ in range(np.size(a)):
                        r, d = self.env.teststep(a[_])
                        pathx1.append(self.env.x)
                        pathy1.append(self.env.y)
                        j += 1
                        rT1 += r
                        rT2 += (r > 0)
                        if rT1 >= best:
                            best = rT1
                            best2 = rT2
                            bestxy = [self.env.x, self.env.y]

                        # if score drops more than X, extension will be ended
                        if rT1 < best - X:
                            flag = 1
                            break

                        if d == True:
                            flag = 1
                            break
                        #print("Do step stage :",time.time()-test)

                    if flag:
                        break

                bestp = function.check_where(pathx1, pathy1, bestxy)
                pathx1 = pathx1[:bestp + 1]
                pathy1 = pathy1[:bestp + 1]

            rT1o += best
            rT2o += best2

            d = False  # The state of the game (End or Not)
            rT1 = 1  # Total reward
            rT2 = 1  # Total match
            j = 0

            best = 1
            best2 = 1
            flag = 0
            pathx2 = []
            pathy2 = []

            #Reverse Extension
            if (uX1 - 1 >= 0) and (uY1 - 1 >= 0):
                self.env.x = uX1 - 1
                self.env.y = uY1 - 1
                pathx2.append(self.env.x)
                pathy2.append(self.env.y)
                bestxy = [self.env.x, self.env.y]

                while j < uX1 + uY1:
                    # Skip process
                    if self.env.seq1[self.env.x] == self.env.seq2[self.env.y]:
                        a = self.reverseskip()
                    else:
                        #test = time.time()
                        s1 = processState(self.env.renderRev())
                        #print("Rendering stage :",time.time()-test)
                        #test = time.time()
                        a = sess.run(self.mainQN.predict,
                                     feed_dict={self.mainQN.scalarInput: [s1]})
                        #print("Prediction stage :",time.time()-test)
                        #test = time.time()

                    for _ in range(np.size(a)):
                        r, d = self.env.teststep(10 + a[_])
                        pathx2.append(self.env.x)
                        pathy2.append(self.env.y)
                        j += 1
                        rT1 += r
                        rT2 += (r > 0)
                        if rT1 >= best:
                            best = rT1
                            best2 = rT2
                            bestxy = [self.env.x, self.env.y]

                        # if score drops more than X, extension will be ended
                        if rT1 < best - X:
                            flag = 1
                            break

                        if d == True:
                            flag = 1
                            break
                        #print("Do step stage :",time.time()-test)

                    if flag:
                        break

                bestp = function.check_where(pathx2, pathy2, bestxy)
                pathx2 = pathx2[:bestp + 1]
                pathy2 = pathy2[:bestp + 1]

            pathx = pathx2[::-1] + list(range(uX1, uX2 + 1)) + pathx1
            pathy = pathy2[::-1] + list(range(uY1, uY2 + 1)) + pathy1

            rT1o += best
            rT2o += best2

            same = np.sum(
                np.array(self.env.seq1[list(range(uX1, uX2 + 1))]) == np.array(
                    self.env.seq2[list(range(uY1, uY2 + 1))]))
            length = uX2 - uX1 + 1

            rT1o += self.env.reward[0] * same + self.env.reward[1] * (length -
                                                                      same)
            rT2o += same

            path = [pathx, pathy]

        else:
            best = 1
            best2 = 1
            flag = 0
            pathx1 = []
            pathy1 = []

            #Forward Extension
            if (uX2 + 1 <= self.env.sizeS1) and (uY2 - 1 >= 0):
                self.env.x = uX2 + 1
                self.env.y = uY1 - 1
                pathx1.append(self.env.x)
                pathy1.append(self.env.y)
                bestxy = [self.env.x, self.env.y]

                while j < self.env.sizeS1 - uX2 + uY2:
                    # Skip process
                    if self.env.seq1[self.env.x] == self.env.rev2[self.env.y]:
                        a = self.skipRC()
                    else:
                        #test = time.time()
                        s1 = processState(self.env.renderRC())
                        #print("Rendering stage :",time.time()-test)
                        #test = time.time()
                        a = sess.run(self.mainQN.predict,
                                     feed_dict={self.mainQN.scalarInput: [s1]})
                        #print("Prediction stage :",time.time()-test)
                        #test = time.time()

                    for _ in range(np.size(a)):
                        r, d = self.env.stepRC(a[_])
                        pathx1.append(self.env.x)
                        pathy1.append(self.env.y)
                        j += 1
                        rT1 += r
                        rT2 += (r > 0)
                        if rT1 >= best:
                            best = rT1
                            best2 = rT2
                            bestxy = [self.env.x, self.env.y]

                        # if score drops more than X, extension will be ended
                        if rT1 < best - X:
                            flag = 1
                            break

                        if d == True:
                            flag = 1
                            break
                        #print("Do step stage :",time.time()-test)

                    if flag:
                        break

                bestp = function.check_where(pathx1, pathy1, bestxy)
                pathx1 = pathx1[:bestp + 1]
                pathy1 = pathy1[:bestp + 1]

            rT1o += best
            rT2o += best2

            d = False  # The state of the game (End or Not)
            rT1 = 1  # Total reward
            rT2 = 1  # Total match
            j = 0

            best = 1
            best2 = 1
            flag = 0
            pathx2 = []
            pathy2 = []

            #Reverse Extension
            if (uX1 - 1 >= 0) and (uY1 + 1 <= self.env.sizeS2):
                self.env.x = uX1 - 1
                self.env.y = uY1 + 1
                pathx2.append(self.env.x)
                pathy2.append(self.env.y)
                bestxy = [self.env.x, self.env.y]

                while j < uX1 + self.env.sizeS2 - uY1:
                    # Skip process
                    if self.env.seq1[self.env.x] == self.env.rev2[self.env.y]:
                        a = self.reverseskipRC()
                    else:
                        #test = time.time()
                        s1 = processState(self.env.renderRCRev())
                        #print("Rendering stage :",time.time()-test)
                        #test = time.time()
                        a = sess.run(self.mainQN.predict,
                                     feed_dict={self.mainQN.scalarInput: [s1]})
                        #print("Prediction stage :",time.time()-test)
                        #test = time.time()

                    for _ in range(np.size(a)):
                        r, d = self.env.stepRC(10 + a[_])
                        pathx2.append(self.env.x)
                        pathy2.append(self.env.y)
                        j += 1
                        rT1 += r
                        rT2 += (r > 0)
                        if rT1 >= best:
                            best = rT1
                            best2 = rT2
                            bestxy = [self.env.x, self.env.y]

                        # if score drops more than X, extension will be ended
                        if rT1 < best - X:
                            flag = 1
                            break

                        if d == True:
                            flag = 1
                            break
                        #print("Do step stage :",time.time()-test)

                    if flag:
                        break

                bestp = function.check_where(pathx2, pathy2, bestxy)
                pathx2 = pathx2[:bestp + 1]
                pathy2 = pathy2[:bestp + 1]

            pathx = pathx2[::-1] + list(range(uX1, uX2 + 1)) + pathx1
            pathy = pathy2[::-1] + list(range(uY1, uY2 - 1, -1)) + pathy1

            rT1o += best
            rT2o += best2

            same = np.sum(
                np.array(self.env.seq1[list(range(uX1, uX2 + 1))]) == np.array(
                    self.env.rev2[list(range(uY1, uY2 - 1, -1))]))
            length = uX2 - uX1 + 1

            rT1o += self.env.reward[0] * same + self.env.reward[1] * (length -
                                                                      same)
            rT2o += same

            path = [pathx, pathy]

        now = time.time()

        return rT1o, rT2o, now - past, j, path

Example #2

Show file

env = Pairwise(train_env, 0, Z=train_model.param.Z)

mainQN = train_model.mainQN
targetQN = train_model.targetQN
trainables = train_model.trainables
targetOps = train_model.targetOps
""" Initialize the variables """
total_steps = 0
start = time.time()
myBuffer = experience_buffer()

print('Loading Model...')
ckpt = tf.train.get_checkpoint_state(train_env.path)
saver.restore(sess, ckpt.model_checkpoint_path)

s = env.reset()  # Rendered image of the alignment environment
s = processState(s)  # Resize to 1-dimensional vector

a = sess.run(mainQN.predict,
             feed_dict={mainQN.scalarInput: [s]},
             options=run_options,
             run_metadata=run_metadata)
writer = tf.summary.FileWriter(logdir='tensorboard/graph_' +
                               str(FLAGS.model_name) + '_' +
                               str(train_env.win_size),
                               graph=sess.graph)
print(a)

tl = timeline.Timeline(run_metadata.step_stats)
ctf = tl.generate_chrome_trace_format()
with open('tensorboard/timelineOfBug.json', 'w') as f: