def _build_model(self):
        if args.use_local_impl:
            cell = NTMCell(args.num_layers, args.num_units, args.num_memory_locations, args.memory_size,
                           args.num_read_heads, args.num_write_heads, addressing_mode='content_and_location',
                           shift_range=args.conv_shift_range, reuse=False, output_dim=5,
                           clip_value=args.clip_value, init_mode=args.init_mode)
        else:
            def single_cell(num_units):
                return tf.contrib.rnn.BasicLSTMCell(num_units, forget_bias=1.0)

            controller = tf.contrib.rnn.MultiRNNCell(
                [single_cell(args.num_units) for _ in range(args.num_layers)])

            cell = NTMCell(controller, args.num_memory_locations, args.memory_size,
                           args.num_read_heads, args.num_write_heads, shift_range=args.conv_shift_range,
                           output_dim=5,
                           clip_value=args.clip_value)

        output_sequence, _ = tf.nn.dynamic_rnn(
            cell=cell,
            inputs=self.inputs,
            time_major=False,
            dtype=tf.float32,
            initial_state=None)

        self.output_logits = output_sequence[:, self.max_seq_len:, :]
        self.outputs = tf.sigmoid(self.output_logits)
Beispiel #2
0
    def _build_model(self):
        if args.mann == 'none':

            def single_cell(num_units):
                return tf.contrib.rnn.BasicLSTMCell(num_units, forget_bias=1.0)

            cell = tf.contrib.rnn.OutputProjectionWrapper(
                tf.contrib.rnn.MultiRNNCell([
                    single_cell(args.num_units) for _ in range(args.num_layers)
                ]),
                args.num_bits_per_vector,
                activation=None)

            initial_state = tuple(
                tf.contrib.rnn.LSTMStateTuple(
                    c=expand(tf.tanh(learned_init(args.num_units)),
                             dim=0,
                             N=args.batch_size),
                    h=expand(tf.tanh(learned_init(args.num_units)),
                             dim=0,
                             N=args.batch_size))
                for _ in range(args.num_layers))

        elif args.mann == 'ntm':
            cell = NTMCell(args.num_layers,
                           args.num_units,
                           args.num_memory_locations,
                           args.memory_size,
                           args.num_read_heads,
                           args.num_write_heads,
                           addressing_mode='content_and_location',
                           shift_range=args.conv_shift_range,
                           reuse=False,
                           output_dim=args.num_bits_per_vector,
                           clip_value=args.clip_value,
                           init_mode=args.init_mode)

            initial_state = cell.zero_state(args.batch_size, tf.float32)

        output_sequence, _ = tf.nn.dynamic_rnn(cell=cell,
                                               inputs=self.inputs,
                                               time_major=False,
                                               initial_state=initial_state)

        if args.task == 'copy':
            self.output_logits = output_sequence[:, self.max_seq_len + 1:, :]
        elif args.task == 'associative_recall':
            self.output_logits = output_sequence[:,
                                                 3 * (self.max_seq_len + 1) +
                                                 2:, :]

        if args.task in ('copy', 'associative_recall'):
            self.outputs = tf.sigmoid(self.output_logits)
Beispiel #3
0
    def _build_model(self):
        # 模型定义
        cell = NTMCell(
            controller_layers=1,
            controller_units=100,
            memory_size=128,
            memory_vector_dim=20,  # 存储单元
            read_head_num=1,
            write_head_num=1,  # 读写头数目
            addressing_mode='content_and_location',
            shift_range=1,
            reuse=False,
            output_dim=8,
            clip_value=20,
            init_mode='constant')

        # 使用TF.dynamic_rnn运行.
        # 输入(batch, seq_len, dim)  输出 (batch, seq_len, num_units)
        output_sequence, _ = tf.nn.dynamic_rnn(cell=cell,
                                               inputs=self.inputs,
                                               time_major=False,
                                               dtype=tf.float32,
                                               initial_state=None)

        self.output_logits = output_sequence[:, self.max_seq_len + 1:, :]
        self.output = tf.sigmoid(self.output_logits)
Beispiel #4
0
    def _build_encoder_cell(self,
                            hparams,
                            num_layers,
                            num_residual_layers,
                            base_gpu=0):
        """Build a multi-layer RNN cell that can be used by encoder."""

        if hparams.model == 'model3':
            if hparams.mann == 'ntm':
                return NTMCell(hparams.num_layers,
                               hparams.num_units,
                               use_att_memory=False,
                               att_memory=False,
                               att_memory_size=None,
                               att_memory_vector_dim=None,
                               use_ext_memory=True,
                               ext_memory_size=hparams.num_memory_locations,
                               ext_memory_vector_dim=hparams.memory_unit_size,
                               ext_read_head_num=hparams.read_heads,
                               ext_write_head_num=hparams.write_heads,
                               dropout=hparams.dropout,
                               batch_size=hparams.batch_size,
                               mode=self.mode,
                               shift_range=1,
                               output_dim=hparams.num_units,
                               reuse=False,
                               record_w_history=hparams.record_w_history)
            elif hparams.mann == 'dnc':
                access_config = {
                    'memory_size': hparams.num_memory_locations,
                    'word_size': hparams.memory_unit_size,
                    'num_reads': hparams.read_heads,
                    'num_writes': hparams.write_heads
                }
                controller_config = {
                    'num_units': hparams.num_units,
                    'num_layers': hparams.num_layers
                }

                return DNC(access_config, controller_config, hparams.num_units,
                           20, hparams.dropout, self.mode, hparams.batch_size)
        else:
            return model_helper.create_rnn_cell(
                unit_type=hparams.unit_type,
                num_units=hparams.num_units,
                num_layers=num_layers,
                num_residual_layers=num_residual_layers,
                forget_bias=hparams.forget_bias,
                dropout=hparams.dropout,
                num_gpus=hparams.num_gpus,
                mode=self.mode,
                base_gpu=base_gpu,
                single_cell_fn=self.single_cell_fn,
                num_proj=None)
Beispiel #5
0
    def __init__(self):
        super().__init__()

        ntm_cell = NTMCell(controller_layers=CONTROLLER_NUM_LAYERS,
                           controller_units=CONTROLLER_NUM_UNITS_PER_LAYER,
                           memory_size=128,
                           memory_vector_dim=20,
                           read_head_num=1,
                           write_head_num=1,
                           addressing_mode='content_and_location',
                           shift_range=1,
                           output_dim=NUM_BITS_PER_VECTOR,
                           clip_value=20,
                           init_mode='constant')

        self.rnn = keras.layers.RNN(cell=ntm_cell,
                                    return_sequences=True,
                                    return_state=False,
                                    stateful=False,
                                    unroll=True)

        if MODEL_LOAD_PATH is not None:
            self.load_weights(MODEL_LOAD_PATH)
Beispiel #6
0
    def _build_model(self):
        if args.mann == 'none':

            def single_cell(num_units):
                return tf.contrib.rnn.BasicLSTMCell(num_units, forget_bias=1.0)

            cell = tf.contrib.rnn.OutputProjectionWrapper(
                tf.contrib.rnn.MultiRNNCell([
                    single_cell(args.num_units) for _ in range(args.num_layers)
                ]),
                args.num_bits_per_vector,
                activation=None)

            initial_state = tuple(
                tf.contrib.rnn.LSTMStateTuple(
                    c=expand(tf.tanh(learned_init(args.num_units)),
                             dim=0,
                             N=args.batch_size),
                    h=expand(tf.tanh(learned_init(args.num_units)),
                             dim=0,
                             N=args.batch_size))
                for _ in range(args.num_layers))

        elif args.mann == 'ntm':
            cell = NTMCell(args.num_layers,
                           args.num_units,
                           args.num_memory_locations,
                           args.memory_size,
                           args.num_read_heads,
                           args.num_write_heads,
                           addressing_mode='content_and_location',
                           shift_range=args.conv_shift_range,
                           reuse=False,
                           output_dim=args.num_bits_per_vector,
                           clip_value=args.clip_value,
                           init_mode=args.init_mode)

            initial_state = cell.zero_state(args.batch_size, tf.float32)
        elif args.mann == 'dnc':
            access_config = {
                'memory_size': args.num_memory_locations,
                'word_size': args.memory_size,
                'num_reads': args.num_read_heads,
                'num_writes': args.num_write_heads,
            }
            controller_config = {
                'hidden_size': args.num_units,
            }

            cell = DNC(access_config, controller_config,
                       args.num_bits_per_vector, args.clip_value)
            initial_state = cell.initial_state(args.batch_size)

        output_sequence, _ = tf.nn.dynamic_rnn(cell=cell,
                                               inputs=self.inputs,
                                               time_major=False,
                                               initial_state=initial_state)

        if args.task == 'copy' or args.task == 'repeat_copy':
            self.output_logits = output_sequence[:, self.max_seq_len + 1:, :]
        elif args.task == 'associative_recall':
            self.output_logits = output_sequence[:,
                                                 3 * (self.max_seq_len + 1) +
                                                 2:, :]
        elif args.task in ('traversal', 'shortest_path'):
            self.output_logits = output_sequence[:, -self.max_seq_len:, :]

        if args.task in ('copy', 'repeat_copy', 'associative_recall'):
            self.outputs = tf.sigmoid(self.output_logits)

        if args.task in ('traversal', 'shortest_path'):
            output_logits_split = tf.split(self.output_logits, 9, axis=2)
            self.outputs = tf.concat(
                [tf.nn.softmax(logits) for logits in output_logits_split],
                axis=2)
    def _build_model(self):
        if args.mann == 'none':

            def single_cell(num_units):
                return tf.contrib.rnn.BasicLSTMCell(num_units, forget_bias=1.0)

            cell = tf.contrib.rnn.OutputProjectionWrapper(
                tf.contrib.rnn.MultiRNNCell([
                    single_cell(args.num_units) for _ in range(args.num_layers)
                ]),
                args.num_bits_per_vector,
                activation=None)

            initial_state = tuple(
                tf.contrib.rnn.LSTMStateTuple(
                    c=expand(tf.tanh(learned_init(args.num_units)),
                             dim=0,
                             N=args.batch_size),
                    h=expand(tf.tanh(learned_init(args.num_units)),
                             dim=0,
                             N=args.batch_size))
                for _ in range(args.num_layers))

        elif args.mann == 'ntm':
            if args.use_local_impl:
                cell = NTMCell(controller_layers=args.num_layers,
                               controller_units=args.num_units,
                               memory_size=args.num_memory_locations,
                               memory_vector_dim=args.memory_size,
                               read_head_num=args.num_read_heads,
                               write_head_num=args.num_write_heads,
                               addressing_mode='content_and_location',
                               shift_range=args.conv_shift_range,
                               reuse=False,
                               output_dim=args.num_bits_per_vector,
                               clip_value=args.clip_value,
                               init_mode=args.init_mode)
            else:

                def single_cell(num_units):
                    return tf.compat.v1.nn.rnn_cell.BasicLSTMCell(
                        num_units, forget_bias=1.0)

                controller = tf.compat.v1.nn.rnn_cell.MultiRNNCell([
                    single_cell(args.num_units) for _ in range(args.num_layers)
                ])

                cell = NTMCell(controller,
                               args.num_memory_locations,
                               args.memory_size,
                               args.num_read_heads,
                               args.num_write_heads,
                               shift_range=args.conv_shift_range,
                               output_dim=args.num_bits_per_vector,
                               clip_value=args.clip_value)

        output_sequence, _ = tf.compat.v1.nn.dynamic_rnn(
            cell=cell,
            inputs=self.inputs,
            time_major=False,
            dtype=tf.float32,
            initial_state=initial_state if args.mann == 'none' else None)

        task_to_offset = {
            CopyTask.name:
            lambda: CopyTask.offset(self.max_seq_len),
            AssociativeRecallTask.name:
            lambda: AssociativeRecallTask.offset(self.max_seq_len),
            SumTask.name:
            lambda: SumTask.offset(self.max_seq_len),
            AverageSumTask.name:
            lambda: AverageSumTask.offset(self.max_seq_len, args.num_experts),
            MTATask.name:
            lambda: MTATask.
            offset(self.max_seq_len, args.num_experts, args.
                   two_tuple_weight_precision, args.two_tuple_alpha_precision)
        }
        try:
            where_output_begins = task_to_offset[args.task]()
            self.output_logits = output_sequence[:, where_output_begins:, :]
        except KeyError:
            raise UnknownTaskError(
                f'No information on output slicing of model for "{args.task}" task'
            )

        # Intentionally put in a map, so that each new task that is added to the library explicitly fails with
        # the message. Otherwise, code fails during the training process with a strange error
        task_to_activation = {
            CopyTask.name: tf.sigmoid,
            AssociativeRecallTask.name: tf.sigmoid,
            SumTask.name: tf.sigmoid,
            AverageSumTask.name: tf.sigmoid,
            MTATask.name: tf.sigmoid,
        }
        try:
            self.outputs = task_to_activation[args.task](self.output_logits)
        except KeyError:
            raise UnknownTaskError(
                f'No information on activation on model outputs for "{args.task}" task'
            )
    def _build_decoder_cell(self, hparams, encoder_outputs, encoder_state,
                            source_sequence_length):
        """Build a RNN cell with attention mechanism that can be used by decoder."""
        attention_option = hparams.attention
        attention_architecture = hparams.attention_architecture

        if attention_architecture != "standard":
            raise ValueError("Unknown attention architecture %s" %
                             attention_architecture)

        num_units = hparams.num_units
        num_layers = hparams.num_layers
        num_residual_layers = hparams.num_residual_layers
        num_gpus = hparams.num_gpus
        beam_width = hparams.beam_width

        dtype = tf.float32

        if self.time_major:
            memory = tf.transpose(encoder_outputs, [1, 0, 2])
        else:
            memory = encoder_outputs

        if self.mode == tf.contrib.learn.ModeKeys.INFER and beam_width > 0:
            memory = tf.contrib.seq2seq.tile_batch(memory,
                                                   multiplier=beam_width)
            source_sequence_length = tf.contrib.seq2seq.tile_batch(
                source_sequence_length, multiplier=beam_width)
            encoder_state = tf.contrib.seq2seq.tile_batch(
                encoder_state, multiplier=beam_width)
            batch_size = self.batch_size * beam_width
        else:
            batch_size = self.batch_size

        if hparams.model in ('model0', 'model1', 'model2'):
            att_memory = tf.contrib.layers.fully_connected(
                memory,
                num_units,
                activation_fn=None,
                weights_initializer=tf.random_uniform_initializer(-0.1, 0.1))

            cell = NTMCell(num_layers,
                           num_units,
                           use_att_memory=True,
                           att_memory=att_memory,
                           att_memory_size=hparams.src_max_len,
                           att_memory_vector_dim=num_units,
                           use_ext_memory=(hparams.model == 'model2'),
                           ext_memory_size=hparams.num_memory_locations
                           if hparams.model == 'model2' else None,
                           ext_memory_vector_dim=hparams.memory_unit_size
                           if hparams.model == 'model2' else None,
                           ext_read_head_num=hparams.read_heads
                           if hparams.model == 'model2' else None,
                           ext_write_head_num=hparams.write_heads
                           if hparams.model == 'model2' else None,
                           dropout=hparams.dropout,
                           batch_size=batch_size,
                           mode=self.mode,
                           output_dim=num_units,
                           addressing_mode='content' if hparams.model
                           == 'model0' else 'content_and_location')

            decoder_initial_state = cell.zero_state(batch_size, dtype)

            if hparams.pass_hidden_state:
                decoder_initial_state = tuple([encoder_state] +
                                              list(decoder_initial_state[1:]))
        else:
            attention_mechanism = create_attention_mechanism(
                attention_option, num_units, memory, source_sequence_length)

            cell = model_helper.create_rnn_cell(
                unit_type=hparams.unit_type,
                num_units=num_units,
                num_layers=num_layers,
                num_residual_layers=num_residual_layers,
                forget_bias=hparams.forget_bias,
                dropout=hparams.dropout,
                num_gpus=num_gpus,
                mode=self.mode,
                single_cell_fn=self.single_cell_fn,
                num_proj=None,
                num_cells=2 if (hparams.encoder_type == "bi") else 1)

            # Only generate alignment in greedy INFER mode.
            alignment_history = (self.mode == tf.contrib.learn.ModeKeys.INFER
                                 and beam_width == 0)
            cell = tf.contrib.seq2seq.AttentionWrapper(
                cell,
                attention_mechanism,
                attention_layer_size=num_units,
                alignment_history=alignment_history,
                name="attention")

            # TODO(thangluong): do we need num_layers, num_gpus?
            cell = tf.contrib.rnn.DeviceWrapper(
                cell, model_helper.get_device_str(num_layers - 1, num_gpus))

            if hparams.pass_hidden_state:
                decoder_initial_state = cell.zero_state(
                    batch_size, dtype).clone(cell_state=encoder_state)
            else:
                decoder_initial_state = cell.zero_state(batch_size, dtype)

        return cell, decoder_initial_state