def basic_lstm_model(inputs): print "Loading basic lstm model.." for i in range(self.config.rnn_numLayers): with tf.variable_scope('rnnLayer' + str(i)): lstm_cell = rnn_cell.BasicLSTMCell(self.config.hidden_size) outputs, _ = tf.nn.dynamic_rnn( lstm_cell, inputs, self.ph_seqLen, #(b_sz, tstp, h_sz) dtype=tf.float32, swap_memory=True, scope='badic_lstm_model_layer-' + str(i)) inputs = outputs #b_sz, tstp, h_sz mask = mkMask(self.ph_seqLen, tstp) # b_sz, tstp mask = tf.expand_dims(mask, dim=2) #b_sz, tstp, 1 aggregate_state = reduce_avg(outputs, mask, tf.expand_dims(self.ph_seqLen, 1), dim=-2) #b_sz, h_sz inputs = aggregate_state inputs = tf.reshape(inputs, [-1, self.config.hidden_size]) for i in range(self.config.fnn_numLayers): inputs = rnn.rnn_cell._linear(inputs, self.config.hidden_size, bias=True, scope='fnn_layer-' + str(i)) inputs = tf.nn.tanh(inputs) aggregate_state = inputs logits = rnn.rnn_cell._linear(aggregate_state, self.config.class_num, bias=True, scope='fnn_softmax') return logits
def basic_cnn_model(inputs): in_channel = self.config.embed_size filter_sizes = self.config.filter_sizes out_channel = self.config.num_filters input = inputs for layer in range(self.config.cnn_numLayers): with tf.name_scope("conv-layer-" + str(layer)): conv_outputs = [] for i, filter_size in enumerate(filter_sizes): with tf.variable_scope("conv-maxpool-%d" % filter_size): # Convolution Layer filter_shape = [ filter_size, in_channel, out_channel ] W = tf.get_variable(name='W', shape=filter_shape) b = tf.get_variable(name='b', shape=[out_channel]) conv = tf.nn.conv1d( # size (b_sz, tstp, out_channel) input, W, stride=1, padding="SAME", name="conv") # Apply nonlinearity h = tf.nn.relu(tf.nn.bias_add(conv, b), name="relu") conv_outputs.append(h) input = tf.concat( 2, conv_outputs ) #b_sz, tstp, out_channel*len(filter_sizes) in_channel = out_channel * len(filter_sizes) # Maxpooling # mask = tf.sequence_mask(self.ph_seqLen, tstp, dtype=tf.float32) #(b_sz, tstp) mask = mkMask(self.ph_seqLen, tstp) # b_sz, tstp pooled = tf.reduce_max( input * tf.expand_dims(tf.cast(mask, dtype=tf.float32), 2), [1]) #(b_sz, out_channel*len(filter_sizes)) #size (b_sz, out_channel*len(filter_sizes)) inputs = tf.reshape(pooled, shape=[b_sz, out_channel * len(filter_sizes)]) for i in range(self.config.fnn_numLayers): inputs = rnn.rnn_cell._linear(inputs, self.config.embed_size, bias=True, scope='fnn_layer-' + str(i)) inputs = tf.nn.tanh(inputs) aggregate_state = inputs logits = rnn.rnn_cell._linear(aggregate_state, self.config.class_num, bias=True, scope='fnn_softmax') return logits
def masked_reverse_routing_iter(caps_uhat, seqLen, iter_num): ''' Args: caps_uhat: shape(b_sz, tstp, out_caps_num, out_caps_dim) seqLen: shape(b_sz) iter_num: number of iteration Returns: V_ret: #shape(b_sz, out_caps_num, out_caps_dim) ''' assert iter_num > 0 b_sz = tf.shape(caps_uhat)[0] tstp = tf.shape(caps_uhat)[1] out_caps_num = int(caps_uhat.get_shape()[2]) seqLen = tf.where(tf.equal(seqLen, 0), tf.ones_like(seqLen), seqLen) mask = mkMask(seqLen, tstp) # shape(b_sz, tstp) mask = tf.tile( tf.expand_dims(mask, axis=-1), # shape(b_sz, tstp, out_caps_num) multiples=[1, 1, out_caps_num]) # shape(b_sz, tstp, out_caps_num) B = tf.zeros([b_sz, tstp, out_caps_num], dtype=tf.float32) B = tf.where(mask, B, tf.ones_like(B) * _MIN_NUM) for i in range(iter_num): C = tf.nn.softmax(B, dim=1) # shape(b_sz, tstp, out_caps_num) C = tf.expand_dims(C, axis=-1) # shape(b_sz, tstp, out_caps_num, 1) weighted_uhat = C * caps_uhat # shape(b_sz, tstp, out_caps_num, out_caps_dim) S = tf.reduce_sum(weighted_uhat, axis=1) # shape(b_sz, out_caps_num, out_caps_dim) V = _squash(S, axes=[2]) # shape(b_sz, out_caps_num, out_caps_dim) V = tf.expand_dims( V, axis=1) # shape(b_sz, 1, out_caps_num, out_caps_dim) B = tf.reduce_sum(caps_uhat * V, axis=-1) + B # shape(b_sz, tstp, out_caps_num) V_ret = tf.squeeze(V, axis=[1]) # shape(b_sz, out_caps_num, out_caps_dim) S_ret = S return V_ret, S_ret
def masked_routing_iter(caps_uhat, seqLen, iter_num, caps_ihat=None, w_rr=None): ''' Args: caps_uhat: shape(b_sz, tstp, out_caps_num, out_caps_dim) seqLen: shape(b_sz) iter_num: number of iteration Returns: V_ret: #shape(b_sz, out_caps_num, out_caps_dim) ''' assert iter_num > 0 b_sz = tf.shape(caps_uhat)[0] tstp = tf.shape(caps_uhat)[1] out_caps_num = int(caps_uhat.get_shape()[2]) seqLen = tf.where(tf.equal(seqLen, 0), tf.ones_like(seqLen), seqLen) mask = mkMask(seqLen, tstp) # shape(b_sz, tstp) floatmask = tf.cast(tf.expand_dims(mask, axis=-1), dtype=tf.float32) # shape(b_sz, tstp, 1) B = tf.zeros([b_sz, tstp, out_caps_num], dtype=tf.float32) C_list = list() for i in range(iter_num): B_logits = B C = tf.nn.softmax(B, axis=2) # shape(b_sz, tstp, out_caps_num) C = tf.expand_dims(C * floatmask, axis=-1) # shape(b_sz, tstp, out_caps_num, 1) weighted_uhat = C * caps_uhat # shape(b_sz, tstp, out_caps_num, out_caps_dim) C_list.append(C) S = tf.reduce_sum(weighted_uhat, axis=1) # shape(b_sz, out_caps_num, out_caps_dim) V = _squash(S, axes=[2]) # shape(b_sz, out_caps_num, out_caps_dim) V = tf.expand_dims(V, axis=1) # shape(b_sz, 1, out_caps_num, out_caps_dim) if caps_ihat == None: B = tf.reduce_sum(caps_uhat * V, axis=-1) + B # shape(b_sz, tstp, out_caps_num) else: B = tf.reduce_sum(caps_uhat * V, axis=-1) + 0.1 * tf.squeeze( tf.matmul(tf.matmul(caps_uhat, tf.tile(w_rr, [tf.shape(caps_uhat)[0], tf.shape(caps_uhat)[1], 1, 1])), tf.tile(caps_ihat, [1, tf.shape(caps_uhat)[1], 1, 1])), axis=-1) + B # shape(b_sz, tstp, out_caps_num) V_ret = tf.squeeze(V, axis=[1]) # shape(b_sz, out_caps_num, out_caps_dim) S_ret = S C_ret = tf.squeeze(tf.stack(C_list), axis=[4]) return V_ret, S_ret, C_ret, B_logits
def basic_cbow_model(inputs): mask = mkMask(self.ph_seqLen, tstp) # b_sz, tstp mask = tf.expand_dims(mask, dim=2) #b_sz, tstp, 1 aggregate_state = reduce_avg(inputs, mask, tf.expand_dims(self.ph_seqLen, 1), dim=-2) #b_sz, emb_sz inputs = aggregate_state inputs = tf.reshape(inputs, [-1, self.config.embed_size]) for i in range(self.config.fnn_numLayers): inputs = rnn.rnn_cell._linear(inputs, self.config.embed_size, bias=True, scope='fnn_layer-' + str(i)) inputs = tf.nn.tanh(inputs) aggregate_state = inputs logits = rnn.rnn_cell._linear(aggregate_state, self.config.class_num, bias=True, scope='fnn_softmax') return logits
def basic_Centality(in_x, xLen, config, is_train, scope=None): ''' :param in_x: shape(b_sz, xlen, h_sz) :param xLen: :return: ''' def PowerEigen(matrix, max_iter, eta): ''' :param matrix: shape(b_sz, dim, dim) :return out: shape(b_sz, dim) Power method with out gradient, this way it wont't store intermediate states and saves memory. ''' b_sz = tf.shape(matrix)[0] dim = tf.shape(matrix)[1] def body(time, _, y): ''' :param y: shape(b_sz, dim, 1) :return out: shape(b_sz, dim, 1) ''' v = y / (tf.sqrt(tf.reduce_sum(y**2, axis=1, keep_dims=True)) + EPSILON) y = tf.matmul(matrix, v) # shape(b_sz, dim, 1) theta = tf.matmul(v, y, transpose_a=True) # shape(b_sz, 1, 1) stop = tf.less(tf.reduce_sum((y - theta * v)**2, axis=1), eta * tf.squeeze(theta, axis=1)**2) # shape(b_sz, 1) stop = tf.squeeze(stop, axis=1) # shape(b_sz) acc = tf.cast(tf.logical_not(stop), dtype=time.dtype) assert_inf = tf.Assert(tf.reduce_all(tf.is_finite(y)), ['y inf print v', v], summarize=100000) assert_nan = tf.Assert(tf.logical_not( tf.reduce_any(tf.is_nan(y))), ['y Nan print v', v], summarize=100000) with tf.control_dependencies([assert_inf, assert_nan]): y = tf.identity(y) return time + acc, stop, y, def stop_cond(time, stop, _): ''' :param time: shape(b_sz) int32 :param stop: shape(b_sz,) bool :param _: :return: ''' return tf.logical_and(tf.reduce_all(tf.less(time, max_iter)), tf.reduce_any(tf.logical_not(stop))) zero_step = tf.zeros([b_sz], dtype=tf.int32) stop = tf.zeros(shape=[b_sz], dtype=tf.bool) # shape(b_sz,) init_eig = tf.random_normal([b_sz, dim, 1], dtype=matrix.dtype)**2 + EPSILON # init_eig = tf.ones([b_sz, dim, 1], dtype=matrix.dtype) time, stop, y = tf.while_loop( stop_cond, body=body, loop_vars=[zero_step, stop, init_eig], back_prop=False, swap_memory=True) # shape(b_sz, dim, 1) v = y / (tf.sqrt(tf.reduce_sum(y**2, axis=1, keep_dims=True)) + EPSILON) eigenValue = tf.reduce_mean(tf.matmul(matrix, v) / v, axis=1) # shape(b_sz, 1) tf.summary.histogram("converge-time", time) return time, stop, eigenValue, v # shape(b_sz, dim, 1) def PowerEigen_grad_step(matrix, max_iter, eigenVector): ''' :param matrix: shape(b_sz, dim, dim) :param max_iter: gradient iteration number :param eigenVector: the converged eigen vector :return out: shape(b_sz, dim) Power method with gradient, small number of steps is sufficient to get a good approximation of gradient. ''' b_sz = tf.shape(matrix)[0] dim = tf.shape(matrix)[1] def body(time, y): ''' :param y: shape(b_sz, dim, 1) :return out: shape(b_sz, dim, 1) ''' v = y / (tf.sqrt(tf.reduce_sum(y**2, axis=1, keep_dims=True)) + EPSILON) y = tf.matmul(matrix, v) # shape(b_sz, dim, 1) return time + 1, y def stop_cond(time, _): ''' :param time: shape(b_sz) int32 :param stop: shape(b_sz,) bool :param _: :return: ''' return tf.less(time, max_iter) zero_step = tf.constant(0, dtype=tf.int32) time, y = tf.while_loop(stop_cond, body=body, loop_vars=[zero_step, eigenVector], swap_memory=True) # shape(b_sz, dim, 1) v = y / (tf.sqrt(tf.reduce_sum(y**2, axis=1, keep_dims=True)) + EPSILON) eigenValue = tf.reduce_mean(tf.matmul(matrix, v) / v, axis=1) # shape(b_sz, 1) return time, eigenValue, tf.transpose( v, perm=[0, 2, 1]) # (shape(b_sz, 1), shape(b_sz, 1, dim)) def connect_fnn(in_x): ''' :param in_x: shape(b_sz, xLen, h_sz) :return: ''' h_sz = int(in_x.get_shape()[-1]) units = config['units'] left = tf.layers.dense(in_x, units=units, name='left-fnn') #shape(b_sz, xLen, h_sz) right = tf.layers.dense( in_x, units=units, name='right-fnn') # shape(b_sz, xLen, h_sz) fnn_concat_mlp = tf.nn.tanh( tf.expand_dims(left, axis=1) + tf.expand_dims(right, axis=2)) fnn_concat_mlp = tf.layers.dense(fnn_concat_mlp, units=1, name='concat-mlp') connectivity = fnn_concat_mlp connectivity = tf.squeeze(connectivity, axis=-1) return connectivity h_sz = int(in_x.get_shape()[-1]) maxLen = tf.shape(in_x)[1] b_sz = tf.shape(in_x)[0] mask = tf.expand_dims(mkMask(xLen, maxLen), axis=2) # shape(b_sz, xlen, 1) mask = tf.logical_and(mask, tf.transpose(mask, perm=[0, 2, 1 ])) # shape(b_sz, xlen, xlen) with tf.variable_scope(scope or 'Centrality'): if config['center-mode'] == 'center': in_x_cent = in_x - tf.reduce_mean(in_x, axis=2, keepdims=True) elif config['center-mode'] == 'ln': in_x_cent = layers.layer_norm(in_x, begin_norm_axis=-1) elif config['center-mode'] == 'none': in_x_cent = in_x else: raise ValueError('center-mode: %s' % config['center-mode']) connectivity = connect_fnn(in_x_cent) masked_connected = tf.where(mask, connectivity, tf.ones_like(connectivity) * NINF) masked_connected = tf.nn.softmax(masked_connected, axis=1) masked_connected = tf.where(mask, masked_connected, tf.zeros_like(connectivity)) time, stop, _, eigenVec = PowerEigen( masked_connected, tf.cond(is_train, lambda: config['max-iter'], lambda: 5000), eta=config['power-eta']) eigenVec = tf.stop_gradient(eigenVec) # eigenVec = tf.transpose(eigenVec, perm=[0, 2, 1]) _, eigenValue, eigenVec = PowerEigen_grad_step( masked_connected, max_iter=config['grad-iter'], eigenVector=eigenVec) attn_weights = tf.abs(eigenVec) / (tf.reduce_sum( tf.abs(eigenVec), axis=-1, keep_dims=True) + EPSILON) aggregated = tf.matmul(attn_weights, in_x_cent) # shape(b_sz, 1, h_sz) aggregated = tf.squeeze(aggregated, axis=1) aggregated = layers.layer_norm(aggregated, begin_norm_axis=-1) return aggregated, time, stop
def feed_back_lstm(inputs): def feed_back_net(inputs, seq_len, feed_back_steps): ''' Args: inputs: shape(b_sz, tstp, emb_sz) ''' shape_of_input = tf.shape(inputs) b_sz = shape_of_input[0] h_sz = self.config.hidden_size tstp = shape_of_input[1] emb_sz = self.config.embed_size def body(time, prev_output, state_ta): ''' Args: prev_output: previous output shape(b_sz, tstp, hidden_size) ''' prev_output = tf.reshape(prev_output, shape=[-1, h_sz ]) #shape(b_sz*tstp, h_sz) output_linear = tf.nn.rnn_cell._linear( prev_output, output_size=h_sz, #shape(b_sz*tstp, h_sz) bias=False, scope='output_transformer') output_linear = tf.reshape( output_linear, shape=[b_sz, tstp, h_sz]) #shape(b_sz, tstp, h_sz) output_linear = tf.tanh( output_linear) #shape(b_sz, tstp, h_sz) rnn_input = tf.concat(2, [output_linear, inputs], name='concat_output_input' ) #shape(b_sz, tstp, h_sz+emb_sz) cell = tf.nn.rnn_cell.BasicLSTMCell(h_sz) cur_outputs, state = tf.nn.dynamic_rnn(cell, rnn_input, seq_len, dtype=tf.float32, swap_memory=True, time_major=False, scope='encoder') state = tf.concat(1, state) state_ta = state_ta.write(time, state) return time + 1, cur_outputs, state_ta #shape(b_sz, tstp, h_sz) def condition(time, *_): return time < feed_back_steps state_ta = tf.TensorArray(dtype=tf.float32, dynamic_size=True, clear_after_read=True, size=0) initial_output = tf.zeros(shape=[b_sz, tstp, h_sz], dtype=inputs.dtype, name='initial_output') time = tf.constant(0, dtype=tf.int32) _, outputs, state_ta = tf.while_loop( condition, body, [time, initial_output, state_ta], swap_memory=True) final_state = state_ta.read(state_ta.size() - 1) return final_state, outputs _, outputs = feed_back_net(inputs, self.ph_seqLen, feed_back_steps=10) mask = mkMask(self.ph_seqLen, tstp) # b_sz, tstp mask = tf.expand_dims(mask, dim=2) #b_sz, tstp, 1 aggregate_state = reduce_avg(outputs, mask, tf.expand_dims(self.ph_seqLen, 1), dim=-2) #b_sz, h_sz inputs = aggregate_state inputs = tf.reshape(inputs, [-1, self.config.hidden_size]) for i in range(self.config.fnn_numLayers): inputs = rnn.rnn_cell._linear(inputs, self.config.hidden_size, bias=True, scope='fnn_layer-' + str(i)) inputs = tf.nn.tanh(inputs) aggregate_state = inputs logits = rnn.rnn_cell._linear(aggregate_state, self.config.class_num, bias=True, scope='fnn_softmax') return logits