コード例 #1
0
ファイル: Predictor_Parallel.py プロジェクト: zsq007/OCR2Text
    def _build_residual_classifier(self, split_idx):
        output = self.input_splits[split_idx]
        for i in range(self.nb_layers):
            if i == 0:
                output = OptimizedResBlockDisc1(output, self.nb_emb, self.output_dim,
                                                resample=None)
            else:
                output = resblock('ResBlock%d' % (i), self.output_dim, self.output_dim, self.filter_size, output,
                                  'down' if i % 2 == 1 else None, self.is_training_ph, use_bn=self.use_bn,
                                  r=self.residual_connection)
                # [60, 30] --> [30, 15] --> [15, 7] --> [7, 3]
            # if i % 2 == 1:
            #     output = lib.ops.LSTM.sn_non_local_block_sim('self-attention', output)

        # aggregate conv feature maps
        output = tf.reduce_mean(output, axis=[2])  # more clever attention mechanism for weighting the contribution

        if self.use_lstm:
            output = lib.ops.LSTM.bilstm('BILSTM', self.output_dim, output, tf.shape(output)[1])

        output = lib.ops.Linear.linear('AMOutput', self.output_dim * 2 if self.use_lstm else self.output_dim,
                                       self.nb_class, output)

        if not hasattr(self, 'output'):
            self.output = [output]
        else:
            self.output += [output]
コード例 #2
0
ファイル: Predictor_Parallel.py プロジェクト: zsq007/OCR2Text
    def _build_seq2seq(self, split_idx):
        output = self.input_splits[split_idx]
        for i in range(self.nb_layers):
            if i == 0:
                output = OptimizedResBlockDisc1(output, self.nb_emb, self.output_dim,
                                                resample=None)
            else:
                output = resblock('ResBlock%d' % (i), self.output_dim, self.output_dim, self.filter_size, output,
                                  None, self.is_training_ph, use_bn=self.use_bn, r=self.residual_connection)

        # aggregate conv feature maps
        output = tf.reduce_mean(output, axis=[2])  # more clever attention mechanism for weighting the contribution

        encoder_outputs, encoder_states = BiLSTMEncoder('Encoder', self.output_dim, output, self.max_size[0])
        decoder_outputs, decoder_states = AttentionDecoder('Decoder', encoder_outputs, encoder_states, 8)
        output = lib.ops.Linear.linear('MapToOutputEmb', self.output_dim * 2, self.nb_class, decoder_outputs)

        if not hasattr(self, 'output'):
            self.output = [output]
        else:
            self.output += [output]
コード例 #3
0
    def _build_beam_seq2seq(self, split_idx, mode):
        '''
        a seq2seq that uses beam search at inference stage.
        Only the sampled token is passed down to the next step at the inference stage.
        '''
        if mode == 'training':
            output = self.input_splits[split_idx]
        elif mode == 'inference':
            output = self.inference_input_ph
        else:
            raise ValueError('unknown mode')

        with tf.variable_scope('pretrain_effect_zone'):
            for i in range(self.nb_layers):
                if i == 0:
                    output = OptimizedResBlockDisc1(output,
                                                    self.nb_emb,
                                                    self.output_dim,
                                                    resample=None)
                else:
                    shape = output.get_shape().as_list()  # no downsampling
                    output = resblock('ResBlock%d' % (i),
                                      shape[-1],
                                      shape[-1] * 2 if i %
                                      2 == 1 else shape[-1],
                                      self.filter_size,
                                      output,
                                      self.resample if i % 2 == 1 else None,
                                      self.is_training_ph,
                                      use_bn=self.use_bn,
                                      r=self.residual_connection)

            output = tf.nn.relu(output)
            shape = output.get_shape().as_list()
            output = tf.reshape(tf.transpose(output, [0, 2, 1, 3]),
                                [-1, np.prod(shape[1:3]), shape[-1]])

            mnist_output = lib.ops.Linear.linear(
                'mnist_output', np.prod(shape[1:]), self.nb_mnist_class,
                tf.reshape(output, [-1, np.prod(shape[1:])]))

        # auxiliary loss on length
        nb_digits_output = lib.ops.Linear.linear('NBDigitsLinear', shape[-1],
                                                 self.nb_length_class,
                                                 tf.reduce_sum(output, axis=1))

        encoder_outputs, encoder_states = BiLSTMEncoder(
            'Encoder', shape[-1], output, np.prod(shape[1:3]))
        # feature dim from BiLSTMEncoder is shape[-1] * 2
        decoder_outputs, decoder_states = BeamAttDecoder(
            'Decoder',
            encoder_outputs,
            encoder_states,
            self.nb_max_digits,
            self.nb_class,
            mode=mode,
            beam_size=self.beam_size)

        # translation output
        output = decoder_outputs

        if mode == 'training':
            if not hasattr(self, 'output'):
                self.output = [output]
                self.mnist_output = [mnist_output]
                self.nb_digits_output = [nb_digits_output]
            else:
                self.output += [output]
                self.mnist_output += [mnist_output]
                self.nb_digits_output += [nb_digits_output]
        else:
            self.inference_output = output  # [0]: beam tokens, [1] marginal logprob, [2] attention_weights
コード例 #4
0
    def _build_seq2seq(self, split_idx, mode):
        '''
        A fairly basic seq2seq without beam search
        that passes attention vector to the next timestep at the decoding stage
        '''
        if mode == 'training':
            output = self.input_splits[split_idx]
        elif mode == 'inference':
            output = self.inference_input_ph
        else:
            raise ValueError('unknown mode')

        with tf.variable_scope('pretrain_effect_zone'):
            for i in range(self.nb_layers):
                if i == 0:
                    output = OptimizedResBlockDisc1(output,
                                                    self.nb_emb,
                                                    self.output_dim,
                                                    resample=None)
                else:
                    shape = output.get_shape().as_list()
                    output = resblock('ResBlock%d' % (i),
                                      shape[-1],
                                      shape[-1] * 2 if i %
                                      2 == 1 else shape[-1],
                                      self.filter_size,
                                      output,
                                      self.resample if i % 2 == 1 else None,
                                      self.is_training_ph,
                                      use_bn=self.use_bn,
                                      r=self.residual_connection)

            output = tf.nn.relu(output)
            shape = output.get_shape().as_list()
            output = tf.reshape(tf.transpose(output, [0, 2, 1, 3]),
                                [-1, np.prod(shape[1:3]), shape[-1]])

            mnist_output = lib.ops.Linear.linear(
                'mnist_output', np.prod(shape[1:]), self.nb_mnist_class,
                tf.reshape(output, [-1, np.prod(shape[1:])]))

        # auxiliary loss on length
        nb_digits_output = lib.ops.Linear.linear('NBDigitsLinear', shape[-1],
                                                 self.nb_length_class,
                                                 tf.reduce_sum(output, axis=1))

        encoder_outputs, encoder_states = BiLSTMEncoder(
            'Encoder', shape[-1], output, np.prod(shape[1:3]))
        # feature dim from BiLSTMEncoder is shape[-1] * 2
        decoder_outputs, decoder_states, att_weights = AttentionDecoder(
            'Decoder',
            encoder_outputs,
            encoder_states,
            self.nb_max_digits,
        )

        # translation output
        output = lib.ops.Linear.linear('MapToOutputEmb', shape[-1] * 2,
                                       self.nb_class, decoder_outputs)

        if mode == 'training':
            if not hasattr(self, 'output'):
                self.output = [output]
                self.mnist_output = [mnist_output]
                self.nb_digits_output = [nb_digits_output]
            else:
                self.output += [output]
                self.mnist_output += [mnist_output]
                self.nb_digits_output += [nb_digits_output]
        else:
            self.inference_output = output
            self.inference_att_weights = att_weights