def SelfAttention(self, att_num=None, is_fc=False, residual=False): assert is_fc or att_num, 'must state attention feature num for conv' def flatten_hw(layer): shape = layer.get_shape().as_list() layer = tf.reshape(layer, [-1, shape[1] * shape[2], shape[3]]) return layer with tf.variable_scope('att_' + str(self.layernum)): # conv each of them current = self.result current_shape = current.get_shape().as_list() orig_num = current_shape[-1] if is_fc: f = L.Fcnn(current, orig_num, 'att_fc_f' + str(self.layernum)) g = L.Fcnn(current, orig_num, 'att_fc_g' + str(self.layernum)) h = L.Fcnn(current, orig_num, 'att_fc_h' + str(self.layernum)) f = tf.expand_dims(f, axis=-1) g = tf.expand_dims(g, axis=-1) h = tf.expand_dims(h, axis=-1) else: f = L.conv2D(current, 1, att_num, 'att_conv_f_' + str(self.layernum)) g = L.conv2D(current, 1, att_num, 'att_conv_g_' + str(self.layernum)) h = L.conv2D(current, 1, orig_num, 'att_conv_h_' + str(self.layernum)) # flatten them f = flatten_hw(f) g = flatten_hw(g) h = flatten_hw(h) # softmax(fg) fg = tf.matmul(f, g, transpose_b=True) fg = tf.nn.softmax(fg, -1) # out = scale(softmax(fg)h) + x scale = tf.get_variable('Variable', shape=[], initializer=tf.constant_initializer(0.0)) out = tf.matmul(fg, h) if is_fc: out = tf.reshape(out, [-1, orig_num]) else: out = tf.reshape(out, [-1] + current_shape[1:3] + [orig_num]) if residual: out = out + current self.layernum += 1 self.inpsize = out.get_shape().as_list() self.result = out return self.result
def LSTM(inp_holder, hidden_holder, state_holder,outdim,name,reuse=False): with tf.variable_scope(name,reuse=reuse): inp = tf.concat([inp_holder,hidden_holder],-1) inpdim = inp.get_shape().as_list()[-1] # info I1 = L.Fcnn(inp,inpdim, outdim, name='Info_1') I2 = L.Fcnn(inp,inpdim, outdim, name='Info_2') # forget F = L.Fcnn(inp,inpdim, outdim, name='Forget') # output O = L.Fcnn(inp,inpdim, outdim, name='Output') I1_h = L.Fcnn(hidden_holder, outdim, outdim, name='Info_1_hid') I2_h = L.Fcnn(hidden_holder, outdim, outdim, name='Info_2_hid') F_h = L.Fcnn(hidden_holder, outdim, outdim, name='Forget_hid') O_h = L.Fcnn(hidden_holder, outdim, outdim, name='Output_hid') I = tf.sigmoid(I1 + I1_h) * tf.tanh(I2 + I2_h) F = tf.sigmoid(F + F_h) C_next = F * state_holder + I O = tf.sigmoid(O + O_h) H = O * tf.tanh(C_next) return H,C_next
def fcLayer(self,outsize,activation=-1,nobias=False,batch_norm=False): with tf.variable_scope('fc_'+str(self.layernum)): self.result = L.Fcnn(self.result,self.inpsize[1],outsize,'fc_'+str(self.layernum),nobias=nobias) if batch_norm: self.result = L.batch_norm(self.result,'batch_norm_'+str(self.layernum),training=self.bntraining,epsilon=self.epsilon) self.inpsize[1] = outsize self.activate(activation) self.layernum+=1 return self.result
def fcLayer(self, outsize, activation=-1, nobias=False, batch_norm=False): with tf.variable_scope('fc_' + str(self.layernum)): self.result = L.Fcnn(self.result, self.inpsize[1], outsize, 'fc_' + str(self.layernum), nobias=nobias) if len(self.fcs) != 0: if self.fcs[-1] == len(self.varlist): self.transShape[-1] = outsize self.varlist = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES) if batch_norm: self.result = L.batch_norm(self.result, 'batch_norm_' + str(self.layernum)) self.inpsize[1] = outsize self.activate(activation) self.layernum += 1 return [self.result, list(self.inpsize)]