Esempio n. 1
0
    def _predict(self, strings, beam_search=True):
        if self._translation_model:
            encoded = [
                self._encoder.encode(translation_textcleaning(string)) + [1]
                for string in strings
            ]
        else:
            encoded = self._encoder.encode(strings)
        batch_x = pad_sentence_batch(encoded, 0)[0]

        if beam_search:
            output = 'beam'
        else:
            output = 'greedy'

        r = self._execute(
            inputs=[batch_x],
            input_labels=['Placeholder'],
            output_labels=[output],
        )
        p = r[output].tolist()
        if self._translation_model:
            result = []
            for row in p:
                result.append(
                    self._encoder.decode([i for i in row if i not in [0, 1]]))
        else:
            result = self._encoder.decode(p)
        return result
Esempio n. 2
0
 def _translate(self, strings, beam_search=True):
     encoded = [
         self._tokenizer.encode(translation_textcleaning(string)) + [1]
         for string in strings
     ]
     if beam_search:
         output = self._beam
     else:
         output = self._greedy
     batch_x = pad_sentence_batch(encoded, 0)[0]
     p = self._sess.run(output, feed_dict={self._X: batch_x}).tolist()
     result = []
     for row in p:
         result.append(
             self._tokenizer.decode([i for i in row if i not in [0, 1]]))
     return result
Esempio n. 3
0
 def _translate(self, strings):
     encoded = [
         self._encoder.encode(translation_textcleaning(string)) + [1]
         for string in strings
     ]
     batch_x = pad_sequences(encoded, padding='post', maxlen=self._maxlen)
     r = self._execute(
         inputs=[batch_x],
         input_labels=['Placeholder'],
         output_labels=['logits'],
     )
     p = r['logits']
     result = []
     for r in p:
         result.append(
             self._encoder.decode([i for i in r.tolist() if i > 0]))
     return result