Beispiel #1
0
    def TestPwdProb(
            self,
            TestString):  # Calculate the probability of a certain Test String
        # if its length is equal to RNN.batch_size, calculate directly
        # if larger, calculate first bach_size and then multiply the other locations
        if TestString.__contains__('«') == False:
            TestString = TestString + '«'

        # preparation
        idx2char = Data.GetCharsSet()
        char2idx = {c: i for i, c in enumerate(idx2char)}
        Overlen = len(TestString) - RNN.batch_size - 1
        MaxTimes = 10**50
        # Initial Calculate
        x_data, y_data = Data.Pwd2Batch(TestString, RNN.max_sequence_length)
        x_idx = [[char2idx[c] for c in x_data[k]]
                 for k in range(RNN.max_sequence_length)]
        result = FirstCharProb.GetProb(TestString[0])
        # take the first batch_size locaitons into the RNN
        Probability = self.sess.run(self.outputs, feed_dict={self.X: x_idx})
        Probability = self.ProbSoftMax(
            Probability.tolist())  # change into a good format
        # display the predicted characters
        # for i in range(RNN.batch_size):
        # 	for j in range(RNN.max_sequence_length):
        # 		print(idx2char[Probability[i][j].index(max(Probability[i][j]))],end='')
        # 	print("",end=" ")
        # multiply all first locations
        for i in range(RNN.max_sequence_length):
            result = result * Probability[i][i][char2idx[TestString[i + 1]]]
        if Overlen == 0:  # only has the size of batch_size
            if result == 0:
                return MaxTimes
            return 1 / result
        # over-long part calculation
        NextString = TestString[Overlen:len(TestString)]
        x_data, y_data = Data.Pwd2Batch(NextString, RNN.max_sequence_length)
        x_idx = [[char2idx[c] for c in x_data[k]]
                 for k in range(RNN.max_sequence_length)]

        Probability = self.sess.run(self.outputs, feed_dict={self.X: x_idx})
        Probability = Probability.tolist()
        Probability = self.ProbSoftMax(Probability)

        for i in range(Overlen):
            result = result * Probability[RNN.max_sequence_length - i - 1]\
                    [RNN.max_sequence_length - i - 1]\
                    [char2idx[NextString[RNN.max_sequence_length- i]]]
        if result == 0:
            return MaxTimes
        return 1 / result
Beispiel #2
0
    def Predict(self, Last, depth=0, OutFile=None):
        if depth > 12:
            return
        else:
            depth = depth + 1
        idx2char = Data.GetCharsSet()
        char2idx = {c: i for i, c in enumerate(idx2char)}
        x_idx = [char2idx[c] for c in Last]
        x_idx = np.reshape(x_idx, [-1, 10])

        Probability = self.sess.run(self.outputs, feed_dict={self.X: x_idx})
        Probability = Probability[0][len(Probability[0]) - 1].tolist()

        temp = ['a' for i in range(10)]
        for i in range(9):
            temp[i] = Last[i + 1]
        # 针对最后一个字符进行递归
        width = 2
        for i in range(1, width + 1):
            temp[9] = idx2char[FindIndex(Probability, i)]
            if temp[9] == ' ':
                continue
            if temp[9] != '«':
                self.Predict(deepcopy(temp), depth, OutFile)
            else:
                result = ''.join(temp)
                result = result.replace(" ", "")
                result = result[:result.index('«')]
                if OutFile is None:
                    print(result)
                else:
                    OutFile.write(str(result) + '\n')
Beispiel #3
0
 def Train(self):
     print("Training...")
     saver = tf.train.Saver(max_to_keep=1)
     merged_summary_op = tf.summary.merge_all()
     writer = tf.summary.FileWriter(RNN.tensorboard_route, self.sess.graph)
     if RNN.is_new_train == True:
         self.sess.run(tf.global_variables_initializer())
     else:
         saver.restore(sess=self.sess, save_path="../Model/model.ckpt")
         self.RestoreSteps()
     # Start
     for j in range(len(Data.batches)):
         x_idx, y_idx = Data.GetBatch()
         #print(len(x_idx))
         #print(len(y_idx))
         # print(y_idx.getshape())
         self.sess.run(self.train, feed_dict={self.X: x_idx, self.Y: y_idx})
         if j % 2000 == 0:
             summary_str, loss = self.sess.run(
                 [merged_summary_op, self.loss],
                 feed_dict={
                     self.X: x_idx,
                     self.Y: y_idx
                 })
             # save into tensorboard
             writer.add_summary(summary_str, RNN.total_step)
             RNN.total_step = RNN.total_step + 2000
             self.SaveSteps()
             self.SaveModel()
             # save model
             print(j, "loss:", loss)
             saver.save(sess=self.sess, save_path="../Model/model.ckpt")
 def Train(self):
     print("Training...")
     saver = tf.train.Saver(max_to_keep=1)
     merged_summary_op = tf.summary.merge_all()
     writer = tf.summary.FileWriter(self.tensorboard_route, self.sess.graph)
     self.sess.run(tf.global_variables_initializer())
     # Start
     #导入数据
     for j in range(len(Data.batches)):
         x_idx, y_idx = Data.GetBatch()
         #对x_idx做进一步处理
         bottleneck_x = self.run_bottleneck_on_x(x_idx)
         self.sess.run(self.train,
                       feed_dict={
                           self.bottleneck_input: bottleneck_x,
                           self.bottleneck_truth: y_idx
                       })
         if j % 2000 == 0:
             summary_str, loss = self.sess.run(
                 [merged_summary_op, self.loss],
                 feed_dict={
                     self.bottleneck_input: bottleneck_x,
                     self.bottleneck_truth: y_idx
                 })
             # save into tensorboard
             writer.add_summary(summary_str, self.total_step)
             self.total_step = self.total_step + 2000
             self.SaveSteps()
             #self.SaveModel()
             # save model
             print(j, "loss:", loss)
             saver.save(sess=self.sess, save_path="../Model/pimodel.ckpt")
    def PaintProb():
        fig = plt.figure(1, figsize=(16, 5))
        ax1 = plt.subplot(111)
        data = []
        CharSet = Data.GetCharsSet()
        for char in CharSet:
            data.append(FirstCharProb.FirstCharDicProb[char])
        data = np.array(data)
        width = 1
        x_bar = np.arange(len(FirstCharProb.FirstCharDicProb))
        ax1.bar(left=x_bar,
                height=data,
                align="center",
                alpha=0.5,
                width=width,
                color="green")

        ax1.set_xticks(x_bar)
        ax1.set_xticklabels(CharSet)
        ax1.set_ylabel("Probability")
        ax1.set_title("First Char Probability")
        ax1.grid(True)
        ax1.set_ylim(
            0, FirstCharProb.FirstCharDicProb[max(
                FirstCharProb.FirstCharDicProb,
                key=FirstCharProb.FirstCharDicProb.get)])
        plt.show()
 def TransNum2Prob():  # change number into probability
     CharSet = Data.GetCharsSet()
     TotalNum = 0
     for char in CharSet:
         TotalNum += FirstCharProb.FirstCharDicNum[char]
     for char in CharSet:
         FirstCharProb.FirstCharDicProb[char] = FirstCharProb.FirstCharDicNum[char] \
                     / TotalNum
 def LearnFormFile(Route):
     file = open(Route, 'r')
     CharSet = Data.GetCharsSet()
     TotalLines = Data.GetLinesNum(Route)
     step = int(TotalLines / 30)
     FinishedLines = 0
     # record the number of each char shows up
     while 1:
         line = file.readline()
         if len(line) <= 0:
             break
         if line[0] not in CharSet:
             continue
         FirstCharProb.FirstCharDicNum[line[0]] += 1
         FinishedLines += 1
         if FinishedLines % step == 0:
             print(100 * FinishedLines / float(TotalLines), "%")
    def CreateNetwork(self):
        #新的网络输入
        id2char = Data.GetCharsSet()
        num_classes = len(id2char)
        learning_rate = 0.005
        self.bottleneck_input = tf.placeholder(tf.float32,
                                               [None, self.hidden_size],
                                               name='HiddenOut')
        #real answer
        self.bottleneck_truth = tf.placeholder(
            tf.int32, [self.batch_size, self.max_sequence_length])
        #define a lstm basic cell
        lstm_cell = tf.nn.rnn_cell.BasicLSTMCell(self.hidden_size,
                                                 forget_bias=1.0,
                                                 state_is_tuple=True)
        # Dropout Layer
        lstm_cell = tf.nn.rnn_cell.DropoutWrapper(cell=lstm_cell,
                                                  input_keep_prob=1.0,
                                                  output_keep_prob=1.0)
        # multi-LSTM cell
        mlstm_cell = tf.nn.rnn_cell.MultiRNNCell(
            [self.LstmCell(self.hidden_size) for _ in range(self.layer_num)],
            state_is_tuple=True)
        init_state = mlstm_cell.zero_state(batch_size=self.batch_size,
                                           dtype=tf.float32)
        #create network
        #print(self.bottleneck_input)
        std_inputs = tf.reshape(self.bottleneck_input,
                                [8, 8, self.hidden_size])
        x_length = [1, 2, 3, 4, 5, 6, 7, 8]
        outputs, _state = tf.nn.dynamic_rnn(mlstm_cell,
                                            inputs=std_inputs,
                                            sequence_length=x_length,
                                            initial_state=init_state,
                                            dtype=tf.float32,
                                            time_major=False)
        X_for_fc = tf.reshape(outputs, [-1, self.hidden_size], name="output")

        #定义一层全连接层
        outputs = tf.contrib.layers.fully_connected(X_for_fc,
                                                    num_classes,
                                                    activation_fn=None)
        # reshape out for sequence_loss
        self.outputs = tf.reshape(outputs, [self.batch_size, -1, num_classes])
        #define loss
        weights = tf.ones([self.batch_size, self.max_sequence_length])
        sequence_loss = tf.contrib.seq2seq.sequence_loss(
            logits=self.outputs,
            targets=self.bottleneck_truth,
            weights=weights)
        self.loss = tf.reduce_mean(sequence_loss)
        tf.summary.scalar('loss', self.loss)
        self.train = tf.train.AdamOptimizer(
            learning_rate=learning_rate).minimize(self.loss)
        self.sess = tf.Session()
        print("OK")
    def Test(self, route):
        print('Testing...')
        data_file = open(route, 'r')
        resultlist = []
        TotalLines = Data.GetLinesNum(route)
        FinishedLines = 0
        while 1:
            line = data_file.readline()
            if len(line) <= 0:
                break
            while line[0] == " ":
                line = line[1:len(line) - 1]
            line = line.replace("\n", "")
            line = line.replace("\r", "")
            line = line + '«'
            result = self.TestPwdProb(line)
            resultlist.append(result)
            FinishedLines = FinishedLines + 1
            if FinishedLines % 1000 == 0:
                print(str(100 * FinishedLines / float(TotalLines)) + str("%"))

        x = np.arange(0, 25, 0.1)
        y = []

        for i in x:
            sum = 0
            for j in resultlist:
                if j < pow(10, i):
                    sum += 1
            y.append(sum / len(resultlist))
        #test running time
        #stop = time()
        #print("结束于" + str(stop)+ "秒")

        plt.plot(x, y)
        plt.title('Guess Result')
        plt.xlabel('Guesses 10^x')
        plt.ylabel('percent guessed')
        plt.show()
Beispiel #10
0
    def CreateNetwork(self):
        print("Creating Network.....")
        tf.set_random_seed(777)  # reproducibility
        idx2char = Data.GetCharsSet()
        # hyper parameters
        hidden_size = 50  # RNN output size
        num_classes = len(idx2char)  # final output size (RNN or softmax, etc.)
        learning_rate = 0.001
        layer_num = 1

        self.X = tf.placeholder(tf.int32,
                                [RNN.batch_size, RNN.max_sequence_length],
                                name="X")  # X Data
        self.Y = tf.placeholder(tf.int32,
                                [RNN.batch_size, RNN.max_sequence_length],
                                name="Y")  # Y label

        x_one_hot = tf.one_hot(
            self.X, num_classes,
            name="x_one_hot")  # one hot: 1 -> 0 1 0 0 0 0 0 0 0 0
        # LSTM cell
        lstm_cell = tf.nn.rnn_cell.BasicLSTMCell(hidden_size,
                                                 forget_bias=1.0,
                                                 state_is_tuple=True)
        # Dropout Layer
        lstm_cell = tf.nn.rnn_cell.DropoutWrapper(cell=lstm_cell,
                                                  input_keep_prob=1.0,
                                                  output_keep_prob=1.0)
        # multi-LSTM cell
        mlstm_cell = tf.nn.rnn_cell.MultiRNNCell(
            [self.LstmCell(hidden_size) for _ in range(layer_num)],
            state_is_tuple=True)

        init_state = mlstm_cell.zero_state(batch_size=RNN.batch_size,
                                           dtype=tf.float32)
        x_length = [1, 2, 3, 4, 5, 6, 7, 8]
        outputs, _state = tf.nn.dynamic_rnn(mlstm_cell,
                                            inputs=x_one_hot,
                                            sequence_length=x_length,
                                            initial_state=init_state,
                                            dtype=tf.float32,
                                            time_major=False)
        # FC layer
        X_for_fc = tf.reshape(outputs, [-1, hidden_size], name="output")
        #print(X_for_fc.get_shape())
        outputs = tf.contrib.layers.fully_connected(X_for_fc,
                                                    num_classes,
                                                    activation_fn=None)

        # reshape out for sequence_loss
        self.outputs = tf.reshape(outputs, [RNN.batch_size, -1, num_classes])

        weights = tf.ones([RNN.batch_size, RNN.max_sequence_length])
        # http://blog.csdn.net/appleml/article/details/54017873
        # print(self.outputs.get_shape())
        # print(self.Y.get_shape())
        sequence_loss = tf.contrib.seq2seq.sequence_loss(logits=self.outputs,
                                                         targets=self.Y,
                                                         weights=weights)
        self.loss = tf.reduce_mean(sequence_loss)
        tf.summary.scalar('loss', self.loss)
        self.train = tf.train.AdamOptimizer(
            learning_rate=learning_rate).minimize(self.loss)
        self.sess = tf.Session()
        print("OK")
 def init():
     CharSet = Data.GetCharsSet()
     for char in CharSet:
         FirstCharProb.FirstCharDicNum[char] = 0