def __call__(self, inputs, prev_state, scope=None): prev_controller_state = prev_state.controller_state prev_access_state = prev_state.access_state search_feature = inputs[0] memory_for_writing = inputs[1] # get lstm controller input controller_input = get_key_feature(search_feature, self._is_train, 'search_key') attention_input, self.att_score = attention(controller_input, prev_controller_state[1], scope) controller_output, controller_state = self._controller( attention_input, prev_controller_state, scope) access_inputs = (memory_for_writing, controller_output) access_output, access_state = self._memory_access( access_inputs, prev_access_state, scope) return access_output, MemNetState(access_state=access_state, controller_state=controller_state)
def initial_state(self, init_feature): init_key = tf.squeeze(get_key_feature(init_feature, self._is_train, 'init_memory_key'), [1, 2]) c_state = tf.layers.dense(init_key, self._hidden_size, activation=tf.nn.tanh, name='c_state') h_state = tf.layers.dense(init_key, self._hidden_size, activation=tf.nn.tanh, name='h_state') batch_size = init_key.get_shape().as_list()[0] controller_state = tf.nn.rnn_cell.LSTMStateTuple(c_state, h_state) write_weights = tf.one_hot([0]*batch_size, self._memory_size_pos, axis=-1, dtype=tf.float32) read_weight = tf.zeros([batch_size, self._memory_size_pos], tf.float32) control_factors = tf.one_hot([2]*batch_size, 3, axis=-1, dtype=tf.float32) write_decay = tf.zeros([batch_size, 1], tf.float32) usage = tf.one_hot([0]*batch_size, self._memory_size_pos, axis=-1, dtype=tf.float32) memory = tf.zeros([batch_size, self._memory_size_pos]+self._slot_size, tf.float32) access_state_pos = AccessStatePos(init_memory=init_feature, memory=memory, read_weight=read_weight, write_weight=write_weights, control_factors=control_factors, write_decay = write_decay, usage=usage) memory_neg = tf.zeros([batch_size, self._memory_size_neg] + self._slot_size, tf.float32) write_weights_neg = tf.zeros([batch_size, config.neg_num_write, self._memory_size_neg], tf.float32) usage_neg = tf.zeros([batch_size, self._memory_size_neg], tf.float32) access_state_neg = AccessStateNeg(memory=memory_neg, write_weight=write_weights_neg, usage=usage_neg) neg_template = tf.zeros([batch_size, config.neg_num_write] + self._slot_size, tf.float32) neg_idx = tf.zeros([batch_size, config.neg_num_write], tf.int32) return MemNetState(controller_state=controller_state, access_state_pos=access_state_pos, access_state_neg=access_state_neg, neg_template=neg_template, neg_idx=neg_idx)
def _read_weights(self, read_key, read_strength, memory): memory_key = tf.squeeze( get_key_feature(memory, self._is_train, 'memory_key'), [2, 3]) if config.use_attention_read: return attention_read(read_key, memory_key) else: return cosine_similarity(memory_key, read_key, read_strength)
def __call__(self, inputs, prev_state, scope=None): prev_controller_state = prev_state.controller_state prev_access_state_pos = prev_state.access_state_pos prev_access_state_neg = prev_state.access_state_neg search_feature = inputs[0] memory_for_writing = inputs[1] # get lstm controller input controller_input = get_key_feature(search_feature, self._is_train, 'search_key') attention_input, self.att_score = attention(controller_input, prev_controller_state[1], scope) controller_output, controller_state = self._controller(attention_input, prev_controller_state, scope) pos_access_inputs = (memory_for_writing, controller_output) access_output_pos, access_state_pos = self._memory_access_pos(pos_access_inputs, prev_access_state_pos, scope) neg_access_inputs = (prev_state.neg_template, controller_output) access_output_neg, access_state_neg = self._memory_access_neg(neg_access_inputs, prev_access_state_neg, scope) trans_pos = tf.layers.conv2d(access_output_pos, config.slot_size[2], [1,1], use_bias=False, name='trans_pos') trans_neg = tf.layers.conv2d(access_output_neg, config.slot_size[2], [1,1], name='trans_neg') adap_gates = tf.sigmoid(tf.layers.conv2d(tf.nn.tanh(trans_pos + trans_neg), config.slot_size[2], config.slot_size[0:2], use_bias=False, name='adap_gates')) access_output = access_output_pos - adap_gates * access_output_neg if int(scope) < config.summary_display_step: tf.summary.histogram('adap_gates/{}'.format(scope), adap_gates) batch_size = search_feature.get_shape().as_list()[0] response = tf.map_fn( lambda inputs: tf.nn.conv2d(tf.expand_dims(inputs[0], 0), tf.expand_dims(inputs[1], 3), [1, 1, 1, 1],'VALID'), elems=[search_feature, access_output], dtype=tf.float32, parallel_iterations=batch_size) response = tf.squeeze(tf.squeeze(response, 1),-1) neg_template, neg_idx = tf.py_func(extract_neg_template, [response, search_feature, config.neg_num_write], [tf.float32, tf.int32]) neg_template.set_shape([batch_size, config.neg_num_write]+config.slot_size) return access_output, MemNetState(access_state_pos=access_state_pos, access_state_neg=access_state_neg, controller_state=controller_state, neg_template=neg_template, neg_idx = neg_idx)