def _build_model(self): if args.use_local_impl: cell = NTMCell(args.num_layers, args.num_units, args.num_memory_locations, args.memory_size, args.num_read_heads, args.num_write_heads, addressing_mode='content_and_location', shift_range=args.conv_shift_range, reuse=False, output_dim=5, clip_value=args.clip_value, init_mode=args.init_mode) else: def single_cell(num_units): return tf.contrib.rnn.BasicLSTMCell(num_units, forget_bias=1.0) controller = tf.contrib.rnn.MultiRNNCell( [single_cell(args.num_units) for _ in range(args.num_layers)]) cell = NTMCell(controller, args.num_memory_locations, args.memory_size, args.num_read_heads, args.num_write_heads, shift_range=args.conv_shift_range, output_dim=5, clip_value=args.clip_value) output_sequence, _ = tf.nn.dynamic_rnn( cell=cell, inputs=self.inputs, time_major=False, dtype=tf.float32, initial_state=None) self.output_logits = output_sequence[:, self.max_seq_len:, :] self.outputs = tf.sigmoid(self.output_logits)
def _build_model(self): if args.mann == 'none': def single_cell(num_units): return tf.contrib.rnn.BasicLSTMCell(num_units, forget_bias=1.0) cell = tf.contrib.rnn.OutputProjectionWrapper( tf.contrib.rnn.MultiRNNCell([ single_cell(args.num_units) for _ in range(args.num_layers) ]), args.num_bits_per_vector, activation=None) initial_state = tuple( tf.contrib.rnn.LSTMStateTuple( c=expand(tf.tanh(learned_init(args.num_units)), dim=0, N=args.batch_size), h=expand(tf.tanh(learned_init(args.num_units)), dim=0, N=args.batch_size)) for _ in range(args.num_layers)) elif args.mann == 'ntm': cell = NTMCell(args.num_layers, args.num_units, args.num_memory_locations, args.memory_size, args.num_read_heads, args.num_write_heads, addressing_mode='content_and_location', shift_range=args.conv_shift_range, reuse=False, output_dim=args.num_bits_per_vector, clip_value=args.clip_value, init_mode=args.init_mode) initial_state = cell.zero_state(args.batch_size, tf.float32) output_sequence, _ = tf.nn.dynamic_rnn(cell=cell, inputs=self.inputs, time_major=False, initial_state=initial_state) if args.task == 'copy': self.output_logits = output_sequence[:, self.max_seq_len + 1:, :] elif args.task == 'associative_recall': self.output_logits = output_sequence[:, 3 * (self.max_seq_len + 1) + 2:, :] if args.task in ('copy', 'associative_recall'): self.outputs = tf.sigmoid(self.output_logits)
def _build_model(self): # 模型定义 cell = NTMCell( controller_layers=1, controller_units=100, memory_size=128, memory_vector_dim=20, # 存储单元 read_head_num=1, write_head_num=1, # 读写头数目 addressing_mode='content_and_location', shift_range=1, reuse=False, output_dim=8, clip_value=20, init_mode='constant') # 使用TF.dynamic_rnn运行. # 输入(batch, seq_len, dim) 输出 (batch, seq_len, num_units) output_sequence, _ = tf.nn.dynamic_rnn(cell=cell, inputs=self.inputs, time_major=False, dtype=tf.float32, initial_state=None) self.output_logits = output_sequence[:, self.max_seq_len + 1:, :] self.output = tf.sigmoid(self.output_logits)
def _build_encoder_cell(self, hparams, num_layers, num_residual_layers, base_gpu=0): """Build a multi-layer RNN cell that can be used by encoder.""" if hparams.model == 'model3': if hparams.mann == 'ntm': return NTMCell(hparams.num_layers, hparams.num_units, use_att_memory=False, att_memory=False, att_memory_size=None, att_memory_vector_dim=None, use_ext_memory=True, ext_memory_size=hparams.num_memory_locations, ext_memory_vector_dim=hparams.memory_unit_size, ext_read_head_num=hparams.read_heads, ext_write_head_num=hparams.write_heads, dropout=hparams.dropout, batch_size=hparams.batch_size, mode=self.mode, shift_range=1, output_dim=hparams.num_units, reuse=False, record_w_history=hparams.record_w_history) elif hparams.mann == 'dnc': access_config = { 'memory_size': hparams.num_memory_locations, 'word_size': hparams.memory_unit_size, 'num_reads': hparams.read_heads, 'num_writes': hparams.write_heads } controller_config = { 'num_units': hparams.num_units, 'num_layers': hparams.num_layers } return DNC(access_config, controller_config, hparams.num_units, 20, hparams.dropout, self.mode, hparams.batch_size) else: return model_helper.create_rnn_cell( unit_type=hparams.unit_type, num_units=hparams.num_units, num_layers=num_layers, num_residual_layers=num_residual_layers, forget_bias=hparams.forget_bias, dropout=hparams.dropout, num_gpus=hparams.num_gpus, mode=self.mode, base_gpu=base_gpu, single_cell_fn=self.single_cell_fn, num_proj=None)
def __init__(self): super().__init__() ntm_cell = NTMCell(controller_layers=CONTROLLER_NUM_LAYERS, controller_units=CONTROLLER_NUM_UNITS_PER_LAYER, memory_size=128, memory_vector_dim=20, read_head_num=1, write_head_num=1, addressing_mode='content_and_location', shift_range=1, output_dim=NUM_BITS_PER_VECTOR, clip_value=20, init_mode='constant') self.rnn = keras.layers.RNN(cell=ntm_cell, return_sequences=True, return_state=False, stateful=False, unroll=True) if MODEL_LOAD_PATH is not None: self.load_weights(MODEL_LOAD_PATH)
def _build_model(self): if args.mann == 'none': def single_cell(num_units): return tf.contrib.rnn.BasicLSTMCell(num_units, forget_bias=1.0) cell = tf.contrib.rnn.OutputProjectionWrapper( tf.contrib.rnn.MultiRNNCell([ single_cell(args.num_units) for _ in range(args.num_layers) ]), args.num_bits_per_vector, activation=None) initial_state = tuple( tf.contrib.rnn.LSTMStateTuple( c=expand(tf.tanh(learned_init(args.num_units)), dim=0, N=args.batch_size), h=expand(tf.tanh(learned_init(args.num_units)), dim=0, N=args.batch_size)) for _ in range(args.num_layers)) elif args.mann == 'ntm': cell = NTMCell(args.num_layers, args.num_units, args.num_memory_locations, args.memory_size, args.num_read_heads, args.num_write_heads, addressing_mode='content_and_location', shift_range=args.conv_shift_range, reuse=False, output_dim=args.num_bits_per_vector, clip_value=args.clip_value, init_mode=args.init_mode) initial_state = cell.zero_state(args.batch_size, tf.float32) elif args.mann == 'dnc': access_config = { 'memory_size': args.num_memory_locations, 'word_size': args.memory_size, 'num_reads': args.num_read_heads, 'num_writes': args.num_write_heads, } controller_config = { 'hidden_size': args.num_units, } cell = DNC(access_config, controller_config, args.num_bits_per_vector, args.clip_value) initial_state = cell.initial_state(args.batch_size) output_sequence, _ = tf.nn.dynamic_rnn(cell=cell, inputs=self.inputs, time_major=False, initial_state=initial_state) if args.task == 'copy' or args.task == 'repeat_copy': self.output_logits = output_sequence[:, self.max_seq_len + 1:, :] elif args.task == 'associative_recall': self.output_logits = output_sequence[:, 3 * (self.max_seq_len + 1) + 2:, :] elif args.task in ('traversal', 'shortest_path'): self.output_logits = output_sequence[:, -self.max_seq_len:, :] if args.task in ('copy', 'repeat_copy', 'associative_recall'): self.outputs = tf.sigmoid(self.output_logits) if args.task in ('traversal', 'shortest_path'): output_logits_split = tf.split(self.output_logits, 9, axis=2) self.outputs = tf.concat( [tf.nn.softmax(logits) for logits in output_logits_split], axis=2)
def _build_model(self): if args.mann == 'none': def single_cell(num_units): return tf.contrib.rnn.BasicLSTMCell(num_units, forget_bias=1.0) cell = tf.contrib.rnn.OutputProjectionWrapper( tf.contrib.rnn.MultiRNNCell([ single_cell(args.num_units) for _ in range(args.num_layers) ]), args.num_bits_per_vector, activation=None) initial_state = tuple( tf.contrib.rnn.LSTMStateTuple( c=expand(tf.tanh(learned_init(args.num_units)), dim=0, N=args.batch_size), h=expand(tf.tanh(learned_init(args.num_units)), dim=0, N=args.batch_size)) for _ in range(args.num_layers)) elif args.mann == 'ntm': if args.use_local_impl: cell = NTMCell(controller_layers=args.num_layers, controller_units=args.num_units, memory_size=args.num_memory_locations, memory_vector_dim=args.memory_size, read_head_num=args.num_read_heads, write_head_num=args.num_write_heads, addressing_mode='content_and_location', shift_range=args.conv_shift_range, reuse=False, output_dim=args.num_bits_per_vector, clip_value=args.clip_value, init_mode=args.init_mode) else: def single_cell(num_units): return tf.compat.v1.nn.rnn_cell.BasicLSTMCell( num_units, forget_bias=1.0) controller = tf.compat.v1.nn.rnn_cell.MultiRNNCell([ single_cell(args.num_units) for _ in range(args.num_layers) ]) cell = NTMCell(controller, args.num_memory_locations, args.memory_size, args.num_read_heads, args.num_write_heads, shift_range=args.conv_shift_range, output_dim=args.num_bits_per_vector, clip_value=args.clip_value) output_sequence, _ = tf.compat.v1.nn.dynamic_rnn( cell=cell, inputs=self.inputs, time_major=False, dtype=tf.float32, initial_state=initial_state if args.mann == 'none' else None) task_to_offset = { CopyTask.name: lambda: CopyTask.offset(self.max_seq_len), AssociativeRecallTask.name: lambda: AssociativeRecallTask.offset(self.max_seq_len), SumTask.name: lambda: SumTask.offset(self.max_seq_len), AverageSumTask.name: lambda: AverageSumTask.offset(self.max_seq_len, args.num_experts), MTATask.name: lambda: MTATask. offset(self.max_seq_len, args.num_experts, args. two_tuple_weight_precision, args.two_tuple_alpha_precision) } try: where_output_begins = task_to_offset[args.task]() self.output_logits = output_sequence[:, where_output_begins:, :] except KeyError: raise UnknownTaskError( f'No information on output slicing of model for "{args.task}" task' ) # Intentionally put in a map, so that each new task that is added to the library explicitly fails with # the message. Otherwise, code fails during the training process with a strange error task_to_activation = { CopyTask.name: tf.sigmoid, AssociativeRecallTask.name: tf.sigmoid, SumTask.name: tf.sigmoid, AverageSumTask.name: tf.sigmoid, MTATask.name: tf.sigmoid, } try: self.outputs = task_to_activation[args.task](self.output_logits) except KeyError: raise UnknownTaskError( f'No information on activation on model outputs for "{args.task}" task' )
def _build_decoder_cell(self, hparams, encoder_outputs, encoder_state, source_sequence_length): """Build a RNN cell with attention mechanism that can be used by decoder.""" attention_option = hparams.attention attention_architecture = hparams.attention_architecture if attention_architecture != "standard": raise ValueError("Unknown attention architecture %s" % attention_architecture) num_units = hparams.num_units num_layers = hparams.num_layers num_residual_layers = hparams.num_residual_layers num_gpus = hparams.num_gpus beam_width = hparams.beam_width dtype = tf.float32 if self.time_major: memory = tf.transpose(encoder_outputs, [1, 0, 2]) else: memory = encoder_outputs if self.mode == tf.contrib.learn.ModeKeys.INFER and beam_width > 0: memory = tf.contrib.seq2seq.tile_batch(memory, multiplier=beam_width) source_sequence_length = tf.contrib.seq2seq.tile_batch( source_sequence_length, multiplier=beam_width) encoder_state = tf.contrib.seq2seq.tile_batch( encoder_state, multiplier=beam_width) batch_size = self.batch_size * beam_width else: batch_size = self.batch_size if hparams.model in ('model0', 'model1', 'model2'): att_memory = tf.contrib.layers.fully_connected( memory, num_units, activation_fn=None, weights_initializer=tf.random_uniform_initializer(-0.1, 0.1)) cell = NTMCell(num_layers, num_units, use_att_memory=True, att_memory=att_memory, att_memory_size=hparams.src_max_len, att_memory_vector_dim=num_units, use_ext_memory=(hparams.model == 'model2'), ext_memory_size=hparams.num_memory_locations if hparams.model == 'model2' else None, ext_memory_vector_dim=hparams.memory_unit_size if hparams.model == 'model2' else None, ext_read_head_num=hparams.read_heads if hparams.model == 'model2' else None, ext_write_head_num=hparams.write_heads if hparams.model == 'model2' else None, dropout=hparams.dropout, batch_size=batch_size, mode=self.mode, output_dim=num_units, addressing_mode='content' if hparams.model == 'model0' else 'content_and_location') decoder_initial_state = cell.zero_state(batch_size, dtype) if hparams.pass_hidden_state: decoder_initial_state = tuple([encoder_state] + list(decoder_initial_state[1:])) else: attention_mechanism = create_attention_mechanism( attention_option, num_units, memory, source_sequence_length) cell = model_helper.create_rnn_cell( unit_type=hparams.unit_type, num_units=num_units, num_layers=num_layers, num_residual_layers=num_residual_layers, forget_bias=hparams.forget_bias, dropout=hparams.dropout, num_gpus=num_gpus, mode=self.mode, single_cell_fn=self.single_cell_fn, num_proj=None, num_cells=2 if (hparams.encoder_type == "bi") else 1) # Only generate alignment in greedy INFER mode. alignment_history = (self.mode == tf.contrib.learn.ModeKeys.INFER and beam_width == 0) cell = tf.contrib.seq2seq.AttentionWrapper( cell, attention_mechanism, attention_layer_size=num_units, alignment_history=alignment_history, name="attention") # TODO(thangluong): do we need num_layers, num_gpus? cell = tf.contrib.rnn.DeviceWrapper( cell, model_helper.get_device_str(num_layers - 1, num_gpus)) if hparams.pass_hidden_state: decoder_initial_state = cell.zero_state( batch_size, dtype).clone(cell_state=encoder_state) else: decoder_initial_state = cell.zero_state(batch_size, dtype) return cell, decoder_initial_state