def encoder_body(time, old_state, output_ta_t): x_t = input_ta.read(time) con = tf.concat(1, [x_t, old_state]) z = tf.sigmoid(tf.matmul(con, W_z) + b_z) r = tf.sigmoid(tf.matmul(con, W_r) + b_r) con = tf.concat(1, [x_t, r*old_state]) h = tf.tanh(tf.matmul(con, W_h) + b_h) new_state = (1-z)*h + z*old_state output_ta_t = output_ta_t.write(time, new_state) def updateall(): return new_state def updatesome(): if reverse: return tf.select( tf.greater_equal(time, max_sequence_length-lengths), new_state, old_state) else: return tf.select(tf.less(time, lengths), new_state, old_state) if reverse: state = tf.cond( tf.greater_equal(time, max_sequence_length-min_sequence_length), updateall, updatesome) else: state = tf.cond(tf.less(time, min_sequence_length), updateall, updatesome) return (time + 1, state, output_ta_t)
def lstm_cell(i, o, state): input_gate = tf.sigmoid(tf.matmul(i, ix) + tf.matmul(o, im) + ib) forget_gate = tf.sigmoid(tf.matmul(i, fx) + tf.matmul(o, fm) + fb) update = tf.matmul(i, cx) + tf.matmul(o, cm) + cb state = forget_gate * state + input_gate * tf.tanh(update) output_gate = tf.sigmoid(tf.matmul(i, ox) + tf.matmul(o, om) + ob) return output_gate * tf.tanh(state), state
def LSTMCell(cls, x, mprev, cprev, weights): xm = tf.concat(1, [x, mprev]) i_i, i_g, f_g, o_g = tf.split(1, 4, tf.matmul(xm, weights)) new_c = tf.sigmoid(f_g) * cprev + tf.sigmoid(i_g) * tf.tanh(i_i) new_c = tf.clip_by_value(new_c, -50.0, 50.0) new_m = tf.sigmoid(o_g) * tf.tanh(new_c) return new_m, new_c
def __call__(self, inputs, state, scope=None): num_proj = self._num_units if self._num_proj is None else self._num_proj c_prev = tf.slice(state, [0, 0], [-1, self._num_units]) m_prev = tf.slice(state, [0, self._num_units], [-1, num_proj]) input_size = inputs.get_shape().with_rank(2)[1] if input_size.value is None: raise ValueError("Could not infer input size from inputs.get_shape()[-1]") with tf.variable_scope(type(self).__name__, initializer=self._initializer): # i = input_gate, j = new_input, f = forget_gate, o = output_gate cell_inputs = tf.concat(1, [inputs, m_prev]) lstm_matrix = tf.nn.bias_add(tf.matmul(cell_inputs, self._concat_w), self._b) i, j, f, o = tf.split(1, 4, lstm_matrix) c = tf.sigmoid(f + self._forget_bias) * c_prev + tf.sigmoid(i) * tf.sigmoid(j) m = tf.sigmoid(o) * tf.tanh(c) if self._num_proj is not None: m = tf.matmul(m, self._concat_w_proj) new_state = tf.concat(1, [c, m]) return m, new_state
def unit(x, hidden_memory_tm1): previous_hidden_state, c_prev = tf.unpack(hidden_memory_tm1) # Input Gate i = tf.sigmoid( tf.matmul(x, self.Wi) + tf.matmul(previous_hidden_state, self.Ui) + self.bi ) # Forget Gate f = tf.sigmoid( tf.matmul(x, self.Wf) + tf.matmul(previous_hidden_state, self.Uf) + self.bf ) # Output Gate o = tf.sigmoid( tf.matmul(x, self.Wog) + tf.matmul(previous_hidden_state, self.Uog) + self.bog ) # New Memory Cell c_ = tf.nn.tanh( tf.matmul(x, self.Wc) + tf.matmul(previous_hidden_state, self.Uc) + self.bc ) # Final Memory cell c = f * c_prev + i * c_ # Current Hidden state current_hidden_state = o * tf.nn.tanh(c) return tf.pack([current_hidden_state, c])
def train(): #placeholders for the traning inputs (4 inputs with 2 features each) and outputs (4 outputs which have a value of 0 or 1) x = tf.placeholder(tf.float32, [4, 2], name='x-inputs') y = tf.placeholder(tf.float32, [4, 1], name='y-inputs') #set up the model calculations temp = tf.sigmoid(tf.matmul(x, w1) + b1) output = tf.sigmoid(tf.matmul(temp, w2) + b2) #cost function is avg error over training samples cost = tf.reduce_mean(((y * tf.log(output)) + ((1 - y) * tf.log(1.0 - output))) * -1) #training step is gradient descent train_step = tf.train.GradientDescentOptimizer(learning_rate=0.01).minimize(cost) #declare training data training_x = [[0,1], [0,0], [1,0], [1,1]] training_y = [[1], [0], [1], [0]] #init session init = tf.initialize_all_variables() sess.run(init) #training for i in range(100000): sess.run(train_step, feed_dict={x:training_x, y:training_y}) if i % 1000 == 0: print (i, sess.run(cost, feed_dict={x:training_x, y:training_y})) print '\ntraining done\n'
def loss_fn(w_flat): w = tf.reshape(w_flat, [visible_size, hidden_size]) x = tf.matmul(data, w) x = tf.sigmoid(x) x = tf.matmul(x, w, transpose_b=True) x = tf.sigmoid(x) return tf.reduce_mean(tf.square(x-data))
def __call__(self, inputs, state, scope=None): with tf.variable_scope(scope or type(self).__name__): initializer = tf.random_uniform_initializer(-0.1, 0.1) def get_variable(name, shape): return tf.get_variable(name, shape, initializer=initializer, dtype=inputs.dtype) c_prev, y_prev = tf.split(1, 2, state) W_z = get_variable("W_z", [self.input_size, self._num_blocks]) W_f = get_variable("W_f", [self.input_size, self._num_blocks]) W_o = get_variable("W_o", [self.input_size, self._num_blocks]) R_z = get_variable("R_z", [self._num_blocks, self._num_blocks]) R_f = get_variable("R_f", [self._num_blocks, self._num_blocks]) R_o = get_variable("R_o", [self._num_blocks, self._num_blocks]) b_z = get_variable("b_z", [1, self._num_blocks]) b_f = get_variable("b_f", [1, self._num_blocks]) b_o = get_variable("b_o", [1, self._num_blocks]) p_f = get_variable("p_f", [self._num_blocks]) p_o = get_variable("p_o", [self._num_blocks]) g = h = tf.tanh z = g(tf.matmul(inputs, W_z) + tf.matmul(y_prev, R_z) + b_z) i = 1 f = tf.sigmoid(tf.matmul(inputs, W_f) + tf.matmul(y_prev, R_f) + tf.mul(c_prev, p_f) + b_f) c = tf.mul(i, z) + tf.mul(f, c_prev) o = tf.sigmoid(tf.matmul(inputs, W_o) + tf.matmul(y_prev, R_o) + tf.mul(c, p_o) + b_o) y = tf.mul(h(c), o) return y, tf.concat(1, [c, y])
def mkDiscriminator(input, weights): l1 = tf.nn.tanh(tf.matmul(input, weights['w1']) + weights['b1']) l2 = tf.nn.tanh(tf.matmul(l1,weights['w2']) + weights['b2']) l3 = tf.sigmoid(tf.matmul(l2,weights['w3']) + weights['b3']) return l3 l4 = tf.sigmoid(tf.matmul(l3,weights['w4']) + weights['b4']) return l4
def __call__(self, inputs, state, scope=None): with tf.device("/gpu:"+str(self._gpu_for_layer)): """JZS1, mutant 1 with n units cells.""" with tf.variable_scope(scope or type(self).__name__): # "JZS1Cell" with tf.variable_scope("Zinput"): # Reset gate and update gate. # We start with bias of 1.0 to not reset and not update. '''equation 1 z = sigm(WxzXt+Bz), x_t is inputs''' z = tf.sigmoid(linear([inputs], self._num_units, True, 1.0, weight_initializer = self._weight_initializer, orthogonal_scale_factor = self._orthogonal_scale_factor)) with tf.variable_scope("Rinput"): '''equation 2 r = sigm(WxrXt+Whrht+Br), h_t is the previous state''' r = tf.sigmoid(linear([inputs,state], self._num_units, True, 1.0, weight_initializer = self._weight_initializer, orthogonal_scale_factor = self._orthogonal_scale_factor)) '''equation 3''' with tf.variable_scope("Candidate"): component_0 = linear([r*state], self._num_units, True) component_1 = tf.tanh(tf.tanh(inputs) + component_0) component_2 = component_1*z component_3 = state*(1 - z) h_t = component_2 + component_3 return h_t, h_t #there is only one hidden state output to keep track of.
def __call__(self, inputs, state, scope=None): with tf.device("/gpu:"+str(self._gpu_for_layer)): """JZS3, mutant 2 with n units cells.""" with tf.variable_scope(scope or type(self).__name__): # "JZS1Cell" with tf.variable_scope("Zinput"): # Reset gate and update gate. # We start with bias of 1.0 to not reset and not update. '''equation 1''' z = tf.sigmoid(linear([inputs, tf.tanh(state)], self._num_units, True, 1.0, weight_initializer = self._weight_initializer, orthogonal_scale_factor = self._orthogonal_scale_factor)) '''equation 2''' with tf.variable_scope("Rinput"): r = tf.sigmoid(linear([inputs, state], self._num_units, True, 1.0, weight_initializer = self._weight_initializer, orthogonal_scale_factor = self._orthogonal_scale_factor)) '''equation 3''' with tf.variable_scope("Candidate"): component_0 = linear([state*r,inputs], self._num_units, True) component_2 = (tf.tanh(component_0))*z component_3 = state*(1 - z) h_t = component_2 + component_3 return h_t, h_t #there is only one hidden state output to keep track of.
def unroll(inp, state): g_i = tf.sigmoid(tf.matmul(inp, w_xi) + tf.matmul(state, w_hi) + b_i) g_r = tf.sigmoid(tf.matmul(inp, w_xr) + tf.matmul(state, w_hr) + b_r) u = tf.tanh(tf.matmul(inp, w_xu) + g_r * tf.matmul(state, w_hu) + b_u) state = state * (1 - g_i) + u * g_i return state
def forward_propogation(self): x = tf.placeholder("float") z2 = tf.add(tf.matmul(x,self.W1),self.b1) a2 = tf.sigmoid(z2, name="Hidden Activation") z3 = tf.add(tf.matmul(a2,self.W2),self.b2) a3 = tf.sigmoid(z3, name="Output Activation") return a3
def add_model(self, inputs1, inputs2, seq_len1, seq_len2): #self.initial_state = tf.constant(np.zeros(()), dtype=tf.float32) print 'adsf add_model' self.initial_state = tf.constant(np.zeros((self.config.batch_size,self.config.hidden_size)), dtype=tf.float32) rnn_outputs = [] rnn_outputs1 = [] rnn_outputs2 = [] h_curr1 = self.initial_state h_curr2 = self.initial_state print 'nthgnghn' with tf.variable_scope('rnn'): Whh = tf.get_variable('Whh', shape=(self.config.hidden_size,self.config.hidden_size), dtype=tf.float32) Wxh = tf.get_variable('Wxh', shape=(self.config.embed_size,self.config.hidden_size), dtype=tf.float32) b1 = tf.get_variable('bhx', shape=(self.config.hidden_size,), dtype=tf.float32) print Wxh.get_shape print inputs1[0].get_shape print inputs2[0].get_shape for i in range(self.config.max_steps): h_curr2 = tf.matmul(h_curr2,Whh) h_curr2 += tf.matmul(inputs2[i],Wxh) h_curr2 += b1 h_curr2 = tf.sigmoid(h_curr2) h_curr1 = tf.sigmoid(tf.matmul(h_curr1,Whh) + tf.matmul(inputs1[i],Wxh) + b1) rnn_outputs1.append(h_curr1) rnn_outputs2.append(h_curr2) rnn_states = [tf.concat(1, [rnn_outputs1[i], rnn_outputs2[i]]) for i in range(self.config.max_steps)] return rnn_states
def __call__(self, inputs, state, scope = None): with tf.variable_scope(scope or type(self).__name__): with tf.variable_scope("Gates"): reset, update = tf.split( 1, 2, linear( [inputs, states], 2 * self._num_units, bias = True, bias_start = 1.0 ) ) reset, update = tf.sigmoid(reset), tf.sigmoid(update) with tf.variable_scope("Candidate"): candidate = linear( [inputs, reset * state], self._num_units, bias = True ) candidate = tf.tanh(candidate) new_state = update * state + (1 - update) * candidate return new_state, new_state
def __call__(self, x, state, scope=None): with tf.variable_scope(scope or type(self).__name__): c, h = tf.split(state, 2, 1) x_size = x.get_shape().as_list()[1] w_init = None # uniform h_init = lstm_ortho_initializer(1.0) # Keep W_xh and W_hh separate here as well to use different init methods. w_xh = tf.get_variable( 'W_xh', [x_size, 4 * self.num_units], initializer=w_init) w_hh = tf.get_variable( 'W_hh', [self.num_units, 4 * self.num_units], initializer=h_init) bias = tf.get_variable( 'bias', [4 * self.num_units], initializer=tf.constant_initializer(0.0)) concat = tf.concat([x, h], 1) w_full = tf.concat([w_xh, w_hh], 0) hidden = tf.matmul(concat, w_full) + bias i, j, f, o = tf.split(hidden, 4, 1) if self.use_recurrent_dropout: g = tf.nn.dropout(tf.tanh(j), self.dropout_keep_prob) else: g = tf.tanh(j) new_c = c * tf.sigmoid(f + self.forget_bias) + tf.sigmoid(i) * g new_h = tf.tanh(new_c) * tf.sigmoid(o) return new_h, tf.concat([new_c, new_h], 1) # fuk tuples.
def __call__(self, inputs, state, timestep = 0, scope=None): """Most basic RNN: output = new_state = tanh(W * input + U * state + B).""" current_state = state for highway_layer in xrange(self.num_highway_layers): with tf.variable_scope('highway_factor_'+str(highway_layer)): if self.use_inputs_on_each_layer or highway_layer == 0: highway_factor = tf.tanh(multiplicative_integration([inputs, current_state], self._num_units)) else: highway_factor = tf.tanh(linear([current_state], self._num_units, True)) with tf.variable_scope('gate_for_highway_factor_'+str(highway_layer)): if self.use_inputs_on_each_layer or highway_layer == 0: gate_for_highway_factor = tf.sigmoid(multiplicative_integration([inputs, current_state], self._num_units, initial_bias_value = -3.0)) else: gate_for_highway_factor = tf.sigmoid(linear([current_state], self._num_units, True, -3.0)) gate_for_hidden_factor = 1 - gate_for_highway_factor if self.use_recurrent_dropout and self.is_training: highway_factor = tf.nn.dropout(highway_factor, self.recurrent_dropout_factor) current_state = highway_factor * gate_for_highway_factor + current_state * gate_for_hidden_factor return current_state, current_state
def make_tf_top(x_shape, loss='sigmoid_ce'): """ builds the top layer, i.e. the loss layer. """ with tf.name_scope('top') as scope: x = tf.placeholder(tf.float32, shape=x_shape, name='input') y = tf.placeholder(tf.float32, shape=x_shape, name='output') if loss=='sigmoid_ce': L = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(x, y)) correct_prediction = tf.equal(tf.round( tf.sigmoid(x) ), tf.round( y )) accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) accuracy_summary = [tf.summary.scalar('accuracy', accuracy)] elif loss=='softmax_ce': L = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(x, y)) correct_prediction = tf.equal(tf.argmax( tf.nn.softmax(x), 1 ), tf.argmax( y, 1 )) accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) accuracy_summary = [tf.summary.scalar('accuracy', accuracy)] elif loss=='sigmoid_l2': L = tf.nn.l2_loss(tf.sigmoid(x) - y) accuracy = None accuracy_summary = [] elif loss=='l2': L = tf.nn.l2_loss(x - y) accuracy = None accuracy_summary = [] loss_summary = tf.summary.scalar('log_loss', tf.log(L)) dx = tf.gradients(L, x)[0] return L, dx, tf.summary.merge([loss_summary] + accuracy_summary), accuracy
def __call__(self, x, state, timestep=0, scope=None): with tf.variable_scope(scope or type(self).__name__): h, c = tf.split(state, 2, 1) h_size = self.num_units x_size = x.get_shape().as_list()[1] batch_size = x.get_shape().as_list()[0] w_init = None # uniform h_init = lstm_ortho_initializer(1.0) w_xh = tf.get_variable( 'W_xh', [x_size, 4 * self.num_units], initializer=w_init) w_hh = tf.get_variable( 'W_hh', [self.num_units, 4 * self.num_units], initializer=h_init) concat = tf.concat([x, h], 1) # concat for speed. w_full = tf.concat([w_xh, w_hh], 0) concat = tf.matmul(concat, w_full) #+ bias # live life without garbage. # i = input_gate, j = new_input, f = forget_gate, o = output_gate concat = layer_norm_all(concat, batch_size, 4, h_size, 'ln_all') i, j, f, o = tf.split(concat, 4, 1) if self.use_recurrent_dropout: g = tf.nn.dropout(tf.tanh(j), self.dropout_keep_prob) else: g = tf.tanh(j) new_c = c * tf.sigmoid(f + self.forget_bias) + tf.sigmoid(i) * g new_h = tf.tanh(layer_norm(new_c, h_size, 'ln_c')) * tf.sigmoid(o) return new_h, tf.concat([new_h, new_c], 1)
def __call__(self, x_placeholder, h_prev, C_prev): with tf.variable_scope(self.scope, reuse=True): embedding = tf.get_variable('embedding') W = tf.get_variable('weight') x_embedding = tf.nn.embedding_lookup(embedding, x_placeholder) if self.is_training: x_embedding = tf.nn.dropout(x_embedding, self.keep_prob) # forget gate concat_input = tf.concat(1, [h_prev, x_embedding]) gates = tf.matmul(concat_input, W) m_f, m_i, m_C_update, m_o = tf.split(1, 4, gates) # forget gate f = tf.sigmoid(m_f) # input gate i = tf.sigmoid(m_i) # output gate o = tf.sigmoid(m_o) # Cell update C_update = tf.tanh(m_C_update) # cell after update # Add a dropout layer. C = tf.mul(f, C_prev) + tf.mul(i, C_update) # output h = tf.mul(o, tf.tanh(C)) return h, C
def lstm_cell(x, h, c, name=None, reuse=False): """LSTM returning hidden state and content cell at a specific timestep.""" nin = x.shape[-1].value nout = h.shape[-1].value with tf.variable_scope(name, default_name="lstm", values=[x, h, c], reuse=reuse): wx = tf.get_variable("kernel/input", [nin, nout * 4], dtype=tf.float32, initializer=tf.orthogonal_initializer(1.0)) wh = tf.get_variable("kernel/hidden", [nout, nout * 4], dtype=tf.float32, initializer=tf.orthogonal_initializer(1.0)) b = tf.get_variable("bias", [nout * 4], dtype=tf.float32, initializer=tf.constant_initializer(0.0)) z = tf.matmul(x, wx) + tf.matmul(h, wh) + b i, f, o, u = tf.split(z, 4, axis=1) i = tf.sigmoid(i) f = tf.sigmoid(f + 1.0) o = tf.sigmoid(o) u = tf.tanh(u) c = f * c + i * u h = o * tf.tanh(c) return h, c
def build_node(self, x_in, c_in, h_in, scope="lstm_cell"): #print (x_in, c_in, h_in, scope) #print [type(thing) for thing in (x_in, c_in, h_in, scope)] # print [(item.name, item.dtype) for thing in (h_in, c_in) for item in thing] # print (x_in.name, x_in.dtype) with tf.variable_scope(scope): # print x.shape # print h_in.get_shape() x_with_h = tf.concat(2, [x_in, h_in]) ones_for_bias = tf.constant(np.ones([batch_size,1,1]), name="b", dtype=tf.float32) x_h_concat = tf.concat(2, [ones_for_bias, x_with_h]) # forget gate layer # print "w_f: ", self.w_f.get_shape() # print "x_h_concat: ", x_h_concat.get_shape() f = tf.sigmoid(tf.batch_matmul(x_h_concat, self.w_f)) # candidate values i = tf.sigmoid(tf.batch_matmul(x_h_concat, self.w_i)) candidate_c = tf.tanh(tf.batch_matmul(x_h_concat, self.w_c)) # new cell state (hidden) # forget old values of c old_c_to_keep = tf.mul(f, c_in) # scaled candidate values of c new_c_to_keep = tf.mul(i, candidate_c) c = tf.add(old_c_to_keep, new_c_to_keep) # new scaled output o = tf.sigmoid(tf.batch_matmul(x_h_concat, self.w_o)) h = tf.mul(o, tf.tanh(c)) return (c, h)
def __call__(self, inputs, state, scope=None): """Long short-term memory cell (LSTM).""" with tf.variable_scope(self, scope or "basic_lstm_cell", reuse=self._reuse): # Parameters of gates are concatenated into one multiply for # efficiency. if self._state_is_tuple: c_prev, h_prev = state else: c_prev, h_prev = tf.split( value=state, num_or_size_splits=2, axis=1) concat = tf.contrib.rnn._linear( [inputs, h_prev], 4 * self._num_units, True) # i = input_gate, g = new_input, f = forget_gate, o = output_gate i, g, f, o = tf.split(value=concat, num_or_size_splits=4, axis=1) c = (c_prev * tf.sigmoid(f + self._forget_bias) + tf.sigmoid(i) * tf.tanh(g)) h = tf.tanh(c) * tf.sigmoid(o) if self._state_is_tuple: new_state = LSTMStateTuple(c, h) else: new_state = tf.concat([c, h], 1) return h, new_state
def build_losses(self, logits_real, logits_fake): """D and G play two-player minimax game with value function V(G,D) min_G max _D V(D, G) = IE_{x ~ p_data} [log D(x)] + IE_{z ~ p_fake} [log (1 - D(G(z)))] Args: logits_real (tf.Tensor): discrim logits from real samples logits_fake (tf.Tensor): discrim logits from fake samples produced by generator """ with tf.name_scope("GAN_loss"): score_real = tf.sigmoid(logits_real) score_fake = tf.sigmoid(logits_fake) tf.summary.histogram('score-real', score_real) tf.summary.histogram('score-fake', score_fake) with tf.name_scope("discrim"): d_loss_pos = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits( logits=logits_real, labels=tf.ones_like(logits_real)), name='loss_real') d_loss_neg = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits( logits=logits_fake, labels=tf.zeros_like(logits_fake)), name='loss_fake') d_pos_acc = tf.reduce_mean(tf.cast(score_real > 0.5, tf.float32), name='accuracy_real') d_neg_acc = tf.reduce_mean(tf.cast(score_fake < 0.5, tf.float32), name='accuracy_fake') d_accuracy = tf.add(.5 * d_pos_acc, .5 * d_neg_acc, name='accuracy') self.d_loss = tf.add(.5 * d_loss_pos, .5 * d_loss_neg, name='loss') with tf.name_scope("gen"): self.g_loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits( logits=logits_fake, labels=tf.ones_like(logits_fake)), name='loss') g_accuracy = tf.reduce_mean(tf.cast(score_fake > 0.5, tf.float32), name='accuracy') add_moving_summary(self.g_loss, self.d_loss, d_accuracy, g_accuracy)
def __call__(self, inputs, state, scope=None): with tf.device("/gpu:"+str(self._gpu_for_layer)): """JZS2, mutant 2 with n units cells.""" with tf.variable_scope(scope or type(self).__name__): # "JZS1Cell" with tf.variable_scope("Zinput"): # Reset gate and update gate. '''equation 1''' z = tf.sigmoid(linear.linear([inputs, state], self._num_units, True, 1.0)) '''equation 2 ''' with tf.variable_scope("Rinput"): r = tf.sigmoid(inputs+(linear.linear([state], self._num_units, True, 1.0))) '''equation 3''' with tf.variable_scope("Candidate"): component_0 = linear.linear([state*r,inputs], self._num_units, True) component_2 = (tf.tanh(component_0))*z component_3 = state*(1 - z) h_t = component_2 + component_3 return h_t, h_t #there is only one hidden state output to keep track of.
def lstm_cell(i, o, state): """ Create a LSTM cell. See e.g.: http://arxiv.org/pdf/1402.1128v1.pdf Note that in this formulation, we omit the various connections between the previous state and the gates. """ i_list = tf.pack([i, i, i, i]) #print i_list.get_shape().as_list() o_list = tf.pack([o, o, o, o]) ins = tf.batch_matmul(i_list, fico_x) outs = tf.batch_matmul(o_list, fico_m) h_x = ins + outs + fico_b #print h_x.get_shape().as_list() #forget_gate = tf.sigmoid(tf.matmul(i, fx) + tf.matmul(o, fm) + fb) forget_gate = tf.sigmoid(h_x[0,:,:]) #input_gate = tf.sigmoid(tf.matmul(i, ix) + tf.matmul(o, im) + ib) input_gate = tf.sigmoid(h_x[1,:,:]) #update = tf.tanh(tf.matmul(i, cx) + tf.matmul(o, cm) + cb) update = tf.tanh(h_x[2,:,:]) state = forget_gate*state + input_gate*update #output_gate = tf.sigmoid(tf.matmul(i, ox) + tf.matmul(o, om) + ob) output_gate = tf.sigmoid(h_x[3,:,:]) h = output_gate * tf.tanh(state) #print 'h', h.get_shape().as_list() return h, state
def __call__(self, inputs, state, scope=None): """Long short-term memory cell (LSTM).""" with tf.variable_scope(scope or type(self).__name__): # "BasicLSTMCell" # Parameters of gates are concatenated into one multiply for efficiency. c, h = tf.split(1, 2, state) concat = linear.linear([inputs, h], 4 * self._num_units, True) fs = [] # This can be made more efficient since we're doing more than needs to be # done, but for now w/e for child_state in child_states: c_k, h_k = tf.split(1, 2, child_state) concat = linear.linear([inputs, h_k], 4 * self._num_units, True) i_k, j_k, f_k, o_k = tf.split(1, 4, concat) fs.append(f_k) # i = input_gate, j = new_input, f = forget_gate, o = output_gate # TODO: forget gate for each child, probably need to split by number # of child states or something i, j, f, o = tf.split(1, 4, concat) # If no children just treat it like a regular lstm if not fs: fs.append(f) new_c = sum(c * tf.sigmoid(fs + self._forget_bias)) + tf.sigmoid(i) * tf.tanh(j) new_h = tf.tanh(new_c) * tf.sigmoid(o) return new_h, tf.concat(1, [new_c, new_h])
def __call__(self, inputs, state, scope=None): """Gated recurrent unit (GRU) with nunits cells.""" with vs.variable_scope(scope or type(self).__name__): if self._dropMaskInput.get_shape()[1:] != inputs.get_shape()[1:]: print("error: "+str(self._dropMaskInput.get_shape()[1:])+" != "+str(inputs.get_shape()[1:])) assert(False) if self._dropMaskState.get_shape()[1:] != state.get_shape()[1:]: print("error: "+str(self._dropMaskState.get_shape()[1:])+" != "+str(state.get_shape()[1:])) assert(False) dropin = tf.mul(self._dropMaskInput, inputs) dropst = tf.mul(self._dropMaskState, state) with vs.variable_scope("Gates"): # Reset gate and update gate. # We start with bias of 1.0 to not reset and not update. concat = rnn_cell._linear([dropin, dropst], 2 * self._num_units, True, 1.0) r, u = tf.split(1, 2, concat) r, u = tf.sigmoid(r), tf.sigmoid(u) with vs.variable_scope("Candidate"): htilda = self._activation(rnn_cell._linear([dropin, r * dropst], self._num_units, True)) new_h = u * dropst + (1 - u) * htilda return new_h, new_h
def _compute_loss(self, prediction_tensor, target_tensor, weights): """Compute loss function. Args: prediction_tensor: A float tensor of shape [batch_size, num_anchors, num_classes] representing the predicted logits for each class target_tensor: A float tensor of shape [batch_size, num_anchors, num_classes] representing one-hot encoded classification targets weights: a float tensor of shape, either [batch_size, num_anchors, num_classes] or [batch_size, num_anchors, 1]. If the shape is [batch_size, num_anchors, 1], all the classses are equally weighted. Returns: loss: a float tensor of shape [batch_size, num_anchors, num_classes] representing the value of the loss function. """ if self._bootstrap_type == 'soft': bootstrap_target_tensor = self._alpha * target_tensor + ( 1.0 - self._alpha) * tf.sigmoid(prediction_tensor) else: bootstrap_target_tensor = self._alpha * target_tensor + ( 1.0 - self._alpha) * tf.cast( tf.sigmoid(prediction_tensor) > 0.5, tf.float32) per_entry_cross_ent = (tf.nn.sigmoid_cross_entropy_with_logits( labels=bootstrap_target_tensor, logits=prediction_tensor)) return per_entry_cross_ent * weights
def call(self, x, h): channels = x.shape[self._feature_axis].value with tf.variable_scope('gates'): inputs = tf.concat([x, h], axis=self._feature_axis) n = channels + self._filters m = 2 * self._filters if self._filters > 1 else 2 W = tf.get_variable('kernel', self._kernel + [n, m]) y = tf.nn.convolution(inputs, W, 'SAME', data_format=self._data_format) if self._normalize: r, u = tf.split(y, 2, axis=self._feature_axis) r = tf.contrib.layers.layer_norm(r) u = tf.contrib.layers.layer_norm(u) else: y += tf.get_variable('bias', [m], initializer=tf.ones_initializer()) r, u = tf.split(y, 2, axis=self._feature_axis) r, u = tf.sigmoid(r), tf.sigmoid(u) # TODO #tf.summary.histogram('reset_gate', r) #tf.summary.histogram('update_gate', u) with tf.variable_scope('candidate'): inputs = tf.concat([x, r * h], axis=self._feature_axis) n = channels + self._filters m = self._filters W = tf.get_variable('kernel', self._kernel + [n, m]) y = tf.nn.convolution(inputs, W, 'SAME', data_format=self._data_format) if self._normalize: y = tf.contrib.layers.layer_norm(y) else: y += tf.get_variable('bias', [m], initializer=tf.zeros_initializer()) h = u * h + (1 - u) * self._activation(y) return h, h
def __init__(self, itemNum, userNum, emb_dim, lamda, param=None, initdelta=0.05, learning_rate=0.05): self.itemNum = itemNum self.userNum = userNum self.hidden_num_units = 30 self.emb_dim = emb_dim self.lamda = lamda # regularization parameters self.param = param self.initdelta = initdelta self.learning_rate = learning_rate self.d_params = [] with tf.variable_scope('discriminator'): if self.param == None: self.user_embeddings = tf.Variable( tf.random_uniform([self.userNum, self.emb_dim], minval=-self.initdelta, maxval=self.initdelta, dtype=tf.float32)) self.item_embeddings = tf.Variable( tf.random_uniform([self.itemNum, self.emb_dim], minval=-self.initdelta, maxval=self.initdelta, dtype=tf.float32)) self.item_bias = tf.Variable(tf.zeros([self.itemNum])) else: self.user_embeddings = tf.Variable(self.param[0]) self.item_embeddings = tf.Variable(self.param[1]) self.item_bias = tf.Variable(self.param[2]) self.d_params = [self.user_embeddings, self.item_embeddings, self.item_bias] # placeholder definition self.u = tf.placeholder(tf.int32) self.i = tf.placeholder(tf.int32) self.label = tf.placeholder(tf.float32) self.u_embedding = tf.nn.embedding_lookup(self.user_embeddings, self.u) self.i_embedding = tf.nn.embedding_lookup(self.item_embeddings, self.i) #self.i_bias = tf.gather(self.item_bias, self.i) self.input_embedding = tf.concat([self.u_embedding, self.i_embedding],1) weights = { 'hidden': tf.Variable(tf.random_normal([2*self.emb_dim, self.hidden_num_units], seed=seed)), 'output': tf.Variable(tf.random_normal([self.hidden_num_units, 1], seed=seed)) } biases = { 'hidden': tf.Variable(tf.random_normal([self.hidden_num_units], seed=seed)), 'output': tf.Variable(tf.random_normal([1], seed=seed)) } hidden_layer = tf.add(tf.matmul(self.input_embedding, weights['hidden']), biases['hidden']) hidden_layer = tf.nn.relu(hidden_layer) self.pre_logits = tf.matmul(hidden_layer, weights['output']) + biases['output'] # self.pre_logits = tf.reduce_sum(tf.multiply(self.u_embedding, self.i_embedding), 1) + self.i_bias self.pre_loss = tf.nn.sigmoid_cross_entropy_with_logits(labels=self.label, logits=self.pre_logits) + self.lamda * ( tf.nn.l2_loss(self.u_embedding) + tf.nn.l2_loss(self.i_embedding) + tf.nn.l2_loss(self.i_bias) ) d_opt = tf.train.GradientDescentOptimizer(self.learning_rate) self.d_updates = d_opt.minimize(self.pre_loss, var_list=self.d_params) # self.reward_logits = tf.reduce_sum(tf.multiply(self.u_embedding, self.i_embedding), # 1) + self.i_bias self.reward_logits = tf.matmul(hidden_layer, weights['output']) + biases['output'] self.reward = 2 * (tf.sigmoid(self.reward_logits) - 0.5) # for test stage, self.u: [batch_size] #.......................................Modifications 2 # self.all_rating = tf.matmul(self.u_embedding, self.item_embeddings, transpose_a=False, # transpose_b=True) + self.item_bias self.all_pairs = [[ tf.concat(x,y) for x in self.u_embedding ] for y in self.item_embeddings] hidden_layer = tf.add(tf.matmul(self.all_pairs,weights['hidden']),biases['hidden']) hidden_layer = tf.nn.relu(hidden_layer) self.all_rating = tf.matmul(hidden_layer,weights['output']) + biases['output'] #........................................Modifications # self.all_pairs = [[ tf.concat(x,y) for x in self.u_embedding ] for y in self.item_embeddings] hidden_layer = tf.add(tf.matmul(self.all_pairs, weights['hidden']), biases['hidden']) hidden_layer = tf.nn.relu(hidden_layer) self.all_logits = tf.matmul(hidden_layer, weights['output']) + biases['output'] # self.all_logits = tf.reduce_sum(tf.multiply(self.u_embedding, self.item_embeddings), 1) + self.item_bias self.NLL = -tf.reduce_mean(tf.log( tf.gather(tf.reshape(tf.nn.softmax(tf.reshape(self.all_logits, [1, -1])), [-1]), self.i)) ) # for dns sample # self.all_pairs = [[ tf.concat(x,y) for x in self.u_embedding ] for y in self.item_embeddings] hidden_layer = tf.add(tf.matmul(self.all_pairs, weights['hidden']), biases['hidden']) hidden_layer = tf.nn.relu(hidden_layer) self.dns_rating = tf.matmul(hidden_layer, weights['output']) + biases['output']
import tensorflow as tf import numpy as np tf.set_random_seed(777) x_data = np.array([[0, 0], [0, 1], [1, 0], [1, 1]], dtype=np.float32) y_data = np.array([[0], [1], [1], [0]], dtype=np.float32) X = tf.placeholder(tf.float32, shape=[None, 2]) Y = tf.placeholder(tf.float32, shape=[None, 1]) W = tf.Variable(tf.random_normal([2, 1]), name='weight') b = tf.Variable(tf.random_normal([1]), name='bias') # Hypothesis using sigmoid: tf.div(1., 1. + tf.exp(tf.matmul(X, W))) hypothesis = tf.sigmoid(tf.matmul(X, W) + b) # cost/loss function 로지스틱 리그레션에서 cost에 - 가 붙는다 cost = -tf.reduce_mean(Y * tf.log(hypothesis) + (1 - Y) * tf.log(1 - hypothesis)) train = tf.train.GradientDescentOptimizer(learning_rate=0.1).minimize(cost) # Accuracy computation # True if hypothesis > 0.5 else False predicted = tf.cast(hypothesis > 0.5, dtype=tf.float32) accuracy = tf.reduce_mean(tf.cast(tf.equal(predicted, Y), dtype=tf.float32)) with tf.Session() as sess: sess.run(tf.global_variables_initializer())
def model_fn(features, labels, mode, params): """ define how to train, evaluate and predict from the transfomer model. Args: mode: params: Returns: """ inputs = features['inputs'] seq_steps = features['seq_len'] is_training = (mode == tf.estimator.ModeKeys.TRAIN) try: batch_size, length = get_shape_list(inputs, expected_rank=2) except ValueError: batch_size = 1 length = get_shape_list(inputs, expected_rank=1)[0] inputs = tf.reshape(inputs, [batch_size, length]) with tf.variable_scope('model'): # Build model model = DKT(params, is_training) logits = model(batch_size, inputs, seq_steps) # [batch, length, vocab_size] # when in prediction mode, the label/target is Bone, the model output is the prediction if mode == tf.estimator.ModeKeys.PREDICT: export_outputs = { 'predict_output': tf.estimator.export.PredictOutput( {"predict": tf.sigmoid(logits)}) } output_spec = tf.estimator.EstimatorSpec( mode=mode, predictions={'predict': tf.sigmoid(logits)}, export_outputs=export_outputs) else: # Calculate model loss target_ids = features['target_id'] target_correct = features['target_correct'] loss = dkt_loss(logits, target_correct, target_ids, seq_steps) record_dict = {} record_dict['minibatch_loss'] = loss # Save loss as named tensor will be logged with the logging hook tf.identity(loss, 'cross_entropy') if mode == tf.estimator.ModeKeys.EVAL: metric_dict = get_eval_metrics(logits, target_correct, target_ids, seq_steps) record_dict['accuracy'] = metric_dict['accuracy'] record_scalars(record_dict) output_spec = tf.estimator.EstimatorSpec( mode=tf.estimator.ModeKeys.EVAL, loss=loss, predictions={'predict': tf.sigmoid(logits)}, eval_metric_ops=metric_dict) else: # train # check whether restore from checkpoint tvars = tf.trainable_variables() initialized_variable_names = {} tf.logging.info("**** Trainable Variables ****") for var in tvars: init_string = "" if var.name in initialized_variable_names: init_string = ", *INIT_FROM_CKPT*" tf.logging.info(" name = %s, shape = %s%s", var.name, var.shape, init_string) train_op, metric_dict = get_train_op_and_metrics( loss, params) acc_metric = get_eval_metrics(logits, target_correct, target_ids, seq_steps) record_dict['accuracy'] = acc_metric['accuracy'] record_dict['learning_rate'] = metric_dict['learning_rate'] record_scalars(record_dict) output_spec = tf.estimator.EstimatorSpec( mode=tf.estimator.ModeKeys.TRAIN, loss=loss, train_op=train_op) return output_spec
def feedback_block1(self, inputs, state=None, var_list=None): hidden_state = None cell_state = None if state is not None: hidden_state = state['hidden_state'] cell_state = state['cell_state'] assert (cell_state is None) == (hidden_state is None), 'cell_state and hidden_state must BOTH be supplied as arguments.' with tf.variable_scope('vfeedbacknet_{}'.format(Model.model_name), reuse=True): with tf.variable_scope('feedback_block1'): W_xf = tf.get_variable('W_xf') W_xi = tf.get_variable('W_xi') W_xc = tf.get_variable('W_xc') W_xo = tf.get_variable('W_xo') W_hf = tf.get_variable('W_hf') W_hi = tf.get_variable('W_hi') W_hc = tf.get_variable('W_hc') W_ho = tf.get_variable('W_ho') W_cf = tf.get_variable('W_cf') W_ci = tf.get_variable('W_ci') W_co = tf.get_variable('W_co') b_f = tf.get_variable('b_f') b_i = tf.get_variable('b_i') b_c = tf.get_variable('b_c') b_o = tf.get_variable('b_o') i_t = tf.sigmoid( tf.nn.bias_add( tf.nn.conv2d(inputs, W_xi, [1, 1, 1, 1], padding='SAME') + (tf.nn.conv2d(hidden_state, W_hi, [1, 1, 1, 1], padding='SAME') if hidden_state is not None else tf.to_float(0)) + (tf.multiply(cell_state, W_ci, name='element_wise_multipy') if cell_state is not None else tf.to_float(0)), b_i) ) f_t = tf.sigmoid( tf.nn.bias_add( tf.nn.conv2d(inputs, W_xf, [1, 1, 1, 1], padding='SAME') + (tf.nn.conv2d(hidden_state, W_hf, [1, 1, 1, 1], padding='SAME') if hidden_state is not None else tf.to_float(0)) + (tf.multiply(cell_state, W_cf, name='element_wise_multipy_ft') if cell_state is not None else tf.to_float(0)), b_f) ) new_cell_state = (tf.multiply(f_t, cell_state, name='element_wise_multipy_ct1') if cell_state is not None else tf.to_float(0)) + \ tf.multiply(i_t, tf.tanh( tf.nn.bias_add( tf.nn.conv2d(inputs, W_xc, [1, 1, 1, 1], padding='SAME') + (tf.nn.conv2d(hidden_state, W_hc, [1, 1, 1, 1], padding='SAME') if hidden_state is not None else tf.to_float(0)), b_c) ), name='element_wise_multipy_ct2' ) o_t = tf.sigmoid( tf.nn.bias_add( tf.nn.conv2d(inputs, W_xo, [1, 1, 1, 1], padding='SAME') + (tf.nn.conv2d(hidden_state, W_ho, [1, 1, 1, 1], padding='SAME') if hidden_state is not None else tf.to_float(0)) + tf.multiply(new_cell_state, W_co, name='element_wise_multipy_ot'), b_o) ) new_hidden_state = tf.multiply(o_t, tf.tanh(new_cell_state), name='element_wise_multipy_it') if var_list is not None: for var in [W_xf, W_xi, W_xc, W_xo, W_hf, W_hi, W_hc, W_ho, W_cf, W_ci, W_co, b_f, b_i, b_c, b_o]: if var not in var_list: var_list.append(var) return { 'hidden_state' : new_hidden_state, 'cell_state' : new_cell_state }
def train(inputs, epochs, batch_size=BATCH_SIZE): global_step = tf.Variable(0, dtype=tf.int32, trainable=False, name='global_step') print 'Loading embeddings...' map_index_vec = pickle.load(open(EMBED_PATH + inputs['embed'], 'rb')) print('Done.') n_symbols = len(map_index_vec) ew = np.zeros((n_symbols, 50), dtype=np.float32) for index, vec in map_index_vec.items(): ew[index, :] = vec with tf.name_scope("data"): in1 = tf.placeholder(tf.int32, shape=[None, MAX_STEP], name='in1') in2 = tf.placeholder(tf.int32, shape=[None, MAX_STEP], name='in2') # overlap = tf.placeholder(tf.float32, shape=[None, 2], name='overlap') target = tf.placeholder(tf.float32, shape=[None], name='target') tf.add_to_collection("in1", in1) tf.add_to_collection("in2", in2) # tf.add_to_collection("overlap",overlap) # print 'target : ',target with tf.name_scope("embedding"): embedding_weights = tf.Variable(initial_value=ew, name='embedding_weights') q1 = tf.nn.embedding_lookup(embedding_weights, in1, name='embed_q1') q2 = tf.nn.embedding_lookup(embedding_weights, in2, name='embed_q2') # q1 = tf.transpose(eq1, [0,2,1], name='q1') # q2 = tf.transpose(eq2, [0,2,1], name='q2') print 'q2 : ', q2 with tf.variable_scope('gru1') as scope: x1 = tf.unstack(q1, MAX_STEP, 1) x2 = tf.unstack(q2, MAX_STEP, 1) gru_cell = tf.contrib.rnn.GRUCell(N_HIDDEN_1) y11, _ = tf.contrib.rnn.static_rnn(gru_cell, x1, dtype=tf.float32) scope.reuse_variables() y12, _ = tf.contrib.rnn.static_rnn(gru_cell, x2, dtype=tf.float32) with tf.variable_scope('gru2') as scope: gru_cell = tf.contrib.rnn.GRUCell(N_HIDDEN_2) y21, _ = tf.contrib.rnn.static_rnn(gru_cell, y11, dtype=tf.float32) scope.reuse_variables() y22, _ = tf.contrib.rnn.static_rnn(gru_cell, y12, dtype=tf.float32) with tf.variable_scope('gru3') as scope: gru_cell = tf.contrib.rnn.GRUCell(N_HIDDEN_3) _, y1 = tf.contrib.rnn.static_rnn(gru_cell, y21, dtype=tf.float32) scope.reuse_variables() _, y2 = tf.contrib.rnn.static_rnn(gru_cell, y22, dtype=tf.float32) with tf.variable_scope('process_state'): y_d = tf.squared_difference(y1, y2, name='h_sub_sq') y_cos = tf.reduce_prod(tf.stack(values=[y1, y2], axis=2, name='h_concat'), reduction_indices=2, name='h_dot') y = tf.concat(values=[y1, y2, y_d, y_cos], axis=1) print 'y : ', y with tf.variable_scope('dense1') as scope: w = tf.Variable(tf.truncated_normal([4 * N_HIDDEN_3, N_HIDDEN_DENSE], stddev=0.1, dtype=tf.float32), name='weights1') b = tf.Variable(tf.zeros([N_HIDDEN_DENSE], dtype=tf.float32), name="bias1") hidden = tf.matmul(y, w) + b print 'hidden : ', hidden with tf.variable_scope('dropout') as scope: keep_prob = tf.placeholder(tf.float32) hidden_dropout = tf.nn.dropout(hidden, keep_prob) with tf.variable_scope('dense2') as scope: w = tf.Variable(tf.truncated_normal([N_HIDDEN_DENSE, 1], stddev=0.1, dtype=tf.float32), name='weights2') b = tf.Variable(tf.zeros([1], dtype=tf.float32), name="bias2") logits = tf.matmul(hidden_dropout, w) + b logits = tf.reshape(logits, [-1], name='logits') print 'logits : ', logits with tf.name_scope('lr') as scope: cross_entropy = tf.reduce_mean( tf.nn.sigmoid_cross_entropy_with_logits( logits=logits, labels=target)) #,keep_dims=True) print 'cross_entropy : ', cross_entropy optimizer = tf.train.AdamOptimizer(1e-4) train_step = optimizer.minimize(cross_entropy, global_step=global_step) prediction = tf.sigmoid(logits, name='prediction') tf.add_to_collection("prediction", prediction) correct_prediction = tf.equal(tf.round(prediction), target) accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) with tf.name_scope("summaries"): tf.summary.scalar("loss", cross_entropy) tf.summary.scalar("accuracy", accuracy) tf.summary.histogram("histogram_loss", cross_entropy) summary_op = tf.summary.merge_all() with tf.Session() as sess: print 'Starting session' sess.run(tf.global_variables_initializer()) saver = tf.train.Saver() # it = PaddedDataIterator( pd.read_csv('../input_clean/train_conv.csv') ) if not os.path.exists(CHECKPOINT_PATH): os.makedirs(CHECKPOINT_PATH) ckpt = tf.train.get_checkpoint_state(os.path.dirname(CHECKPOINT_PATH)) if ckpt and ckpt.model_checkpoint_path: saver.restore(sess, ckpt.model_checkpoint_path) print global_step.eval() print '1' writer = tf.summary.FileWriter(LOGDIR, sess.graph) print '2' it = PaddedDataIterator(pd.read_csv(inputs['train_file'])) print '3' int_step = 0 while epochs > 0: # print 'Iteration %d' % int_step batch, epoch_complete = it.next_batch(batch_size) if int_step % 100 == 0: # print batch['vec1'].shape train_accuracy = sess.run(accuracy, feed_dict={ in1: batch['vec1'], target: batch['is_duplicate'], in2: batch['vec2'], keep_prob: KEEP_PROB }) #, overlap: batch['overlap']}) print('Step %d: Training accuracy %g' % (int_step, train_accuracy)) print("{} Saving checkpoint of model...".format( datetime.now())) #save checkpoint of the model checkpoint_name = os.path.join(CHECKPOINT_PATH, 'model_step') save_path = saver.save(sess, checkpoint_name, global_step=global_step) #saver.export_meta_graph(save_path+'.meta') print("{} Model checkpoint saved at {}".format( datetime.now(), checkpoint_name)) _, summary = sess.run( [train_step, summary_op], feed_dict={ in1: batch['vec1'], in2: batch['vec2'], target: batch['is_duplicate'], keep_prob: KEEP_PROB }) #, overlap: batch['overlap']}) writer.add_summary(summary, global_step=global_step.eval()) int_step = global_step.eval() epochs = epochs - epoch_complete if epoch_complete: print 'Epochs left = ', epochs sys.stdout.flush() checkpoint_name = os.path.join(CHECKPOINT_PATH, 'model_train.ckpt') save_path = saver.save(sess, checkpoint_name) #saver.export_meta_graph(save_path+'.meta') del it writer.close()
def __init__(self, W_embedding, settings): self.model_name = settings.model_name self.fact_len = settings.fact_len self.hidden_size = settings.hidden_size self.num_classes = settings.num_classes self.filter_sizes = settings.filter_sizes self.kernel_size = settings.kernel_size self.num_filters = settings.num_filters self.n_filter_total = self.num_filters * len(self.filter_sizes) self.fc_hidden_size = settings.fc_hidden_size self.attn_mode = settings.attn_mode self.seq_encoder = settings.seq_encoder self.dropout = settings.dropout self.out_caps_num = settings.out_caps_num self.rout_iter = settings.rout_iter self.initializer = tf.contrib.layers.xavier_initializer() self._global_step = tf.Variable(0, trainable=False, name='Global_Step') self.update_emas = list() self._tst = tf.placeholder(tf.bool) self._batch_size = tf.placeholder(tf.int32, []) self.is_train = tf.placeholder(dtype=tf.bool, name='is_train') with tf.name_scope('Inputs'): self._X_inputs = tf.placeholder(tf.int32, [None, 16, 16], name='X_input') self.sNum = tf.placeholder(shape=(None, ), dtype=tf.int32, name='ph_sNum') # shape(b_sz, sNum) [[6, 3, 5, 3], [22, 44, 22],] 句子词数 self.wNum = tf.placeholder(shape=(None, 16), dtype=tf.int32, name='ph_wNum') self._y_inputs = tf.placeholder(tf.float32, [None, self.num_classes], name='y_input') with tf.variable_scope('embedding'): self.embedding = tf.get_variable( name='embedding', shape=W_embedding.shape, initializer=tf.constant_initializer(W_embedding), trainable=True) self.embedding_size = W_embedding.shape[1] with tf.variable_scope('Atten_TextCNN'): output = self._inference() print('output ', output) bs, w = output.get_shape().as_list() with tf.variable_scope('fc-bn-layer'): W_fc = self.weight_variable([w, self.fc_hidden_size], name='Weight_fc') tf.summary.histogram('W_fc', W_fc) h_fc = tf.matmul(output, W_fc, name='h_fc') beta_fc = tf.Variable( tf.constant(0.1, tf.float32, shape=[self.fc_hidden_size], name="beta_fc")) tf.summary.histogram('beta_fc', beta_fc) fc_bn, update_ema_fc = self.batchnorm(h_fc, beta_fc, convolutional=False) self.update_emas.append(update_ema_fc) self.fc_bn_relu = tf.nn.relu(fc_bn, name="relu") with tf.variable_scope('out_layer'): W_out = self.weight_variable( [self.fc_hidden_size, self.num_classes], name='Weight_out') tf.summary.histogram('Weight_out', W_out) b_out = self.bias_variable([self.num_classes], name='bias_out') tf.summary.histogram('bias_out', b_out) self.logits = tf.nn.xw_plus_b(self.fc_bn_relu, W_out, b_out, name='y_pred') # 每个类别的分数 scores self._y_pred = tf.sigmoid(self.logits) with tf.name_scope('loss'): self._loss = tf.reduce_mean( tf.nn.sigmoid_cross_entropy_with_logits(logits=self.logits, labels=self._y_inputs)) tf.summary.scalar('loss', self._loss) self.saver = tf.train.Saver(max_to_keep=1)
def sigmoid_network(self, x): z1 = tf.sigmoid(x, name="z1") return z1
def __init__(self, is_training, config): self.batch_size = batch_size = config.batch_size self.num_steps = num_steps = config.num_steps size = config.hidden_size vocab_size = config.vocab_size self._input_data = tf.placeholder(tf.int32, [batch_size, num_steps]) self._targets = tf.placeholder(tf.int32, [batch_size, num_steps]) if 'cell_type' not in dir(config) or config.cell_type == 'gru': cell = BitGRUCell(size, w_bit=config.w_bit, f_bit=config.f_bit) elif config.cell_type == 'lstm': cell = BitLSTMCell(size, w_bit=config.w_bit, f_bit=config.f_bit) if is_training and config.keep_prob < 1: cell = tf.nn.rnn_cell.DropoutWrapper( cell, output_keep_prob=config.keep_prob) cell = tf.nn.rnn_cell.MultiRNNCell([cell] * config.num_layers, state_is_tuple=False) self._initial_state = cell.zero_state(batch_size, tf.float32) self._initial_state = bit_utils.round_bit(tf.sigmoid( self._initial_state), bit=config.f_bit) embedding = tf.get_variable( "embedding", [vocab_size, size], initializer=tf.random_uniform_initializer()) inputs = tf.nn.embedding_lookup(embedding, self._input_data) inputs = bit_utils.round_bit(tf.nn.relu(inputs), bit=config.f_bit) if is_training and config.keep_prob < 1: inputs = tf.nn.dropout(inputs, config.keep_prob) inputs = [ tf.squeeze(input_, [1]) for input_ in tf.split(1, num_steps, inputs) ] outputs, state = tf.nn.rnn(cell, inputs, initial_state=self._initial_state) output = tf.reshape(tf.concat(1, outputs), [-1, size]) with bit_utils.replace_variable( lambda x: bit_utils.quantize_w(tf.tanh(x), bit=config.w_bit)): softmax_w = tf.get_variable("softmax_w", [size, vocab_size]) softmax_b = tf.get_variable("softmax_b", [vocab_size]) logits = tf.matmul(output, softmax_w) + softmax_b loss = tf.nn.seq2seq.sequence_loss_by_example( [logits], [tf.reshape(self._targets, [-1])], [tf.ones([batch_size * num_steps])]) self._cost = cost = tf.reduce_sum(loss) / batch_size self._final_state = state if not is_training: return self._lr = tf.Variable(0.0, trainable=False) tvars = tf.trainable_variables() grads, _ = tf.clip_by_global_norm(tf.gradients(cost, tvars), config.max_grad_norm) optimizer = tf.train.AdamOptimizer(self.lr) self._train_op = optimizer.apply_gradients(zip(grads, tvars))
import tensorflow import numpy xy = numpy.loadtxt('data-03-diabetes.csv', delimiter=',', dtype=numpy.float32) print("==========================") print(xy) x_data = xy[:, 0:-1] y_data = xy[:, [-1]] X = tensorflow.placeholder(tensorflow.float32, shape=[None, 8]) Y = tensorflow.placeholder(tensorflow.float32, shape=[None, 1]) W = tensorflow.Variable(tensorflow.random_normal([8, 1]), name='weight') b = tensorflow.Variable(tensorflow.random_normal([1]), name='bias') hypothesis = tensorflow.sigmoid(tensorflow.matmul(X, W) + b) cost = -tensorflow.reduce_mean(Y * tensorflow.log(hypothesis) + (1 - Y) * tensorflow.log(1 - hypothesis)) train = tensorflow.train.GradientDescentOptimizer( learning_rate=0.01).minimize(cost) predicted = tensorflow.cast(hypothesis > 0.5, dtype=tensorflow.float32) accuracy = tensorflow.reduce_mean( tensorflow.cast(tensorflow.equal(predicted, Y), dtype=tensorflow.float32)) with tensorflow.Session() as session: session.run(tensorflow.global_variables_initializer()) feed = {X: x_data, Y: y_data} for step in range(10001): session.run(train, feed_dict=feed)
def __call__(self, inputs, state, timestep = 0): with tf.variable_scope("LN_GRU"): c, h = state concat_bias = tf.get_variable("concat_bias", [self._num_units * 2], tf.float32, tf.constant_initializer(0.0)) if self._MI: mi_alpha = tf.get_variable("MI_alpha", [self._num_units * 2], tf.float32, tf.constant_initializer(1.0)) with tf.variable_scope("input_weight_matrix"): W_x = tf.get_variable("W_x", [self._num_inputs, self._num_units * 2], tf.float32, tf.contrib.layers.variance_scaling_initializer()) gamma_wx = tf.get_variable("gamma_wx", [self._num_units * 2], tf.float32, tf.constant_initializer(1.0)) beta_wx = tf.get_variable("beta_wx", [self._num_units * 2], tf.float32, tf.constant_initializer(0.0)) if self._MI: mi_beta_x = tf.get_variable("MI_beta_x", [self._num_units * 2], tf.float32, tf.constant_initializer(0.5)) input_act = tf.matmul(inputs, W_x) input_norm = self.normalize_acts(input_act) Wx_act = gamma_wx * input_norm + beta_wx with tf.variable_scope("hidden_hidden_matrix"): W_h = tf.get_variable("W_h", [self._num_units, self._num_units * 2], tf.float32, tf.orthogonal_initializer()) gamma_wh = tf.get_variable("gamma_wh", [self._num_units * 2], tf.float32, tf.constant_initializer(1.0)) beta_wh = tf.get_variable("beta_wh", [self._num_units * 2], tf.float32, tf.constant_initializer(0.0)) if self._MI: mi_beta_h = tf.get_variable("MI_beta_h", [self._num_units * 2], tf.float32, tf.constant_initializer(0.5)) h_act = tf.matmul(h, W_h) h_norm = self.normalize_acts(h_act) Wh_act = gamma_wh * h_norm + beta_wh if self._MI: concat_acts = mi_alpha * Wh_act * Wx_act + mi_beta_x * Wx_act + mi_beta_h * Wh_act + concat_bias else: concat_acts = Wh_act + Wx_act + concat_bias z_t, r_t = tf.split(concat_acts, num_or_size_splits = 2, axis = 1) z_t += self._forget_bias z_t = tf.sigmoid(z_t) with tf.variable_scope("candidate_matrix"): U_h = tf.get_variable("U_h", [self._num_units, self._num_units], tf.float32, tf.orthogonal_initializer()) U_x = tf.get_variable("U_x", [self._num_inputs, self._num_units], tf.float32, tf.contrib.layers.variance_scaling_initializer()) gamma_uh = tf.get_variable("gamma_uh", [self._num_units], tf.float32, tf.constant_initializer(1.0)) beta_uh = tf.get_variable("beta_uh", [self._num_units], tf.float32, tf.constant_initializer(0.0)) gamma_ux = tf.get_variable("gamma_ux", [self._num_units], tf.float32, tf.constant_initializer(1.0)) beta_ux = tf.get_variable("beta_ux", [self._num_units], tf.float32, tf.constant_initializer(0.0)) if self._MI: mi_alpha_hat = tf.get_variable("mi_alpha_hat", [self._num_units], tf.float32, tf.constant_initializer(1.0)) mi_beta_hat_h = tf.get_variable("mi_beta_hat_h", [self._num_units], tf.float32, tf.constant_initializer(0.5)) mi_beta_hat_x = tf.get_variable("mi_beta_hat_x", [self._num_units], tf.float32, tf.constant_initializer(0.5)) mi_beta_hat = tf.get_variable("mi_beta_hat", [self._num_units], tf.float32, tf.constant_initializer(0.0)) Ux_hat = tf.matmul(inputs, U_x) Ux_hat_norm = self.normalize_acts(Ux_hat) Ux_hat_act = gamma_ux * Ux_hat_norm + beta_ux Uh_hat = tf.matmul(h, U_h) Uh_hat_norm = self.normalize_acts(Uh_hat) Uh_hat_act = gamma_uh * Uh_hat_norm + beta_uh cand_act = tf.sigmoid(r_t) * Uh_hat_act if self._MI: h_proposed = self._activation(mi_alpha_hat * Ux_hat_act * cand_act + mi_beta_hat_h * cand_act + mi_beta_hat_x * Ux_hat_act + mi_beta_hat) else: h_proposed = self._activation(Ux_hat_act + cand_act) h_t = z_t * h + (1. - z_t) * h_proposed return h_t, (h_t, h_t)
def main(_): # Import data num_crops = FLAGS.num_crops num_plots = FLAGS.num_plots # Create the model x = tf.placeholder(tf.float32, [None, num_crops]) W1 = tf.Variable(tf.zeros([num_crops, LAYER_SIZE])) b1 = tf.Variable(tf.zeros([LAYER_SIZE])) y1 = tf.matmul(x, W1) + b1 y1 = tf.sigmoid(y1) W3 = tf.Variable(tf.zeros([LAYER_SIZE, num_crops*num_plots])) b3 = tf.Variable(tf.zeros([num_crops*num_plots])) y = tf.matmul(y1, W3) + b3 #y = tf.sigmoid(y) probabilities = [] for i in range(0, num_plots): probabilities.append(tf.nn.softmax(y[:,i*num_crops:(i+1)*num_crops])) y_ = tf.placeholder(tf.float32, [None, num_crops*num_plots]) cross_entropy_total = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits( labels=y_[:,0:num_crops], logits=y[:,0:num_crops])) for i in range(1, num_plots): cross_entropy = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits( labels=y_[:,i*num_crops:(i+1)*num_crops], logits=y[:,i*num_crops:(i+1)*num_crops])) cross_entropy_total = tf.add(cross_entropy, cross_entropy_total) cross_entropy_total = tf.mul(cross_entropy_total, 1.0/num_plots) lr = tf.Variable(0.0, trainable=False) train_step = tf.train.AdamOptimizer(lr).minimize(cross_entropy_total) # Train probability = [] probability2 = [] logits = [] with tf.Session() as sess: sess.run(tf.initialize_all_variables()) saver = tf.train.Saver(tf.all_variables()) inputs, outputs = load_data(FLAGS.data_dir, FLAGS.num_crops, 'data_train.csv', num_plots) eval_inputs, eval_outputs = load_data(FLAGS.data_dir, FLAGS.num_crops, 'data_eval.csv', num_plots) points = [] for i in range(EPOCHS): sess.run(tf.assign(lr, LEARNING_RATE * (DECAY ** i))) for j in range(NUM_BATCHES): batch_size = int(len(inputs)/float(NUM_BATCHES)) batch_xs = inputs[j*batch_size:(j+1)*batch_size,:] batch_ys = outputs[j*batch_size:(j+1)*batch_size,:] _, loss = sess.run([train_step, cross_entropy_total], feed_dict={x: batch_xs, y_: batch_ys}) #print "The loss for iteration " + str(i*NUM_BATCHES + j) + " is " + str(loss) points.append([i*NUM_BATCHES + j, loss]) correct_prediction = tf.equal(tf.argmax(y, 1), tf.argmax(y_, 1)) accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) acc = sess.run(accuracy, feed_dict={x: eval_inputs, y_: eval_outputs}) print "Epoch " + str(i) + " has loss " + str(loss) saver.save(sess, "save/model.ckpt", global_step=i) probability = sess.run(probabilities, feed_dict={x: [[3.2,3.2,3.2,3.2,3.2]]}) probability2 = sess.run(probabilities, feed_dict={x: [[1,1,1,1,1]]}) logits = sess.run(y, feed_dict={x: [[3.2,3.2,3.2,3.2,3.2]]}) for i in probability: print ",".join(map(str,i.tolist()[0])) # import pdb # pdb.set_trace() points = np.array(points) plt.plot(points[:,0],points[:,1],linewidth=2.0) plt.show()
def __init__(self, logits): self.logits = logits self.ps = tf.sigmoid(logits)
def __init__(self, sequence_length, num_classes_list, total_classes, vocab_size, fc_hidden_size, embedding_size, embedding_type, l2_reg_lambda=0.0, pretrained_embedding=None): # Placeholders for input, output, dropout_prob and training_tag self.input_x = tf.placeholder(tf.int32, [None, sequence_length], name="input_x") self.input_y_first = tf.placeholder(tf.float32, [None, num_classes_list[0]], name="input_y_first") self.input_y_second = tf.placeholder(tf.float32, [None, num_classes_list[1]], name="input_y_second") self.input_y_third = tf.placeholder(tf.float32, [None, num_classes_list[2]], name="input_y_third") self.input_y = tf.placeholder(tf.float32, [None, total_classes], name="input_y") self.dropout_keep_prob = tf.placeholder(tf.float32, name="dropout_keep_prob") self.is_training = tf.placeholder(tf.bool, name="is_training") self.global_step = tf.Variable(0, trainable=False, name="Global_Step") # Embedding Layer with tf.device("/cpu:0"), tf.name_scope("embedding"): # Use random generated the word vector by default # Can also be obtained through our own word vectors trained by our corpus if pretrained_embedding is None: self.embedding = tf.Variable(tf.random_uniform( [vocab_size, embedding_size], minval=-1.0, maxval=1.0, dtype=tf.float32), trainable=True, name="embedding") else: if embedding_type == 0: self.embedding = tf.constant(pretrained_embedding, dtype=tf.float32, name="embedding") if embedding_type == 1: self.embedding = tf.Variable(pretrained_embedding, trainable=True, dtype=tf.float32, name="embedding") self.embedded_sentence = tf.nn.embedding_lookup( self.embedding, self.input_x) # Average Vectors # [batch_size, embedding_size] self.embedded_sentence_average = tf.reduce_mean( self.embedded_sentence, axis=1) # First Level with tf.name_scope("first-fc"): W = tf.Variable(tf.truncated_normal( shape=[embedding_size, fc_hidden_size], stddev=0.1, dtype=tf.float32), name="W") b = tf.Variable(tf.constant(value=0.1, shape=[fc_hidden_size], dtype=tf.float32), name="b") self.first_fc = tf.nn.xw_plus_b(self.embedded_sentence_average, W, b) self.first_fc_out = tf.nn.relu(self.first_fc, name="relu") with tf.name_scope("first-output"): W = tf.Variable(tf.truncated_normal( shape=[fc_hidden_size, num_classes_list[0]], stddev=0.1, dtype=tf.float32), name="W") b = tf.Variable(tf.constant(value=0.1, shape=[num_classes_list[0]], dtype=tf.float32), name="b") self.first_logits = tf.nn.xw_plus_b(self.first_fc_out, W, b, name="logits") self.first_scores = tf.sigmoid(self.first_logits, name="scores") # Second Level with tf.name_scope("second-fc"): self.second_input = tf.concat( [self.first_scores, self.embedded_sentence_average], axis=1) W = tf.Variable(tf.truncated_normal(shape=[ (num_classes_list[0] + embedding_size), fc_hidden_size ], stddev=0.1, dtype=tf.float32), name="W") b = tf.Variable(tf.constant(value=0.1, shape=[fc_hidden_size], dtype=tf.float32), name="b") self.second_fc = tf.nn.xw_plus_b(self.second_input, W, b) self.second_fc_out = tf.nn.relu(self.second_fc, name="relu") with tf.name_scope("second-output"): W = tf.Variable(tf.truncated_normal( shape=[fc_hidden_size, num_classes_list[1]], stddev=0.1, dtype=tf.float32), name="W") b = tf.Variable(tf.constant(value=0.1, shape=[num_classes_list[1]], dtype=tf.float32), name="b") self.second_logits = tf.nn.xw_plus_b(self.second_fc_out, W, b, name="logits") self.second_scores = tf.sigmoid(self.second_logits, name="scores") # Third Level with tf.name_scope("third-fc"): self.third_input = tf.concat( [self.second_scores, self.embedded_sentence_average], axis=1) W = tf.Variable(tf.truncated_normal(shape=[ (num_classes_list[1] + embedding_size), fc_hidden_size ], stddev=0.1, dtype=tf.float32), name="W") b = tf.Variable(tf.constant(value=0.1, shape=[fc_hidden_size], dtype=tf.float32), name="b") self.third_fc = tf.nn.xw_plus_b(self.third_input, W, b) self.third_fc_out = tf.nn.relu(self.third_fc, name="relu") with tf.name_scope("third-output"): W = tf.Variable(tf.truncated_normal( shape=[fc_hidden_size, num_classes_list[2]], stddev=0.1, dtype=tf.float32), name="W") b = tf.Variable(tf.constant(value=0.1, shape=[num_classes_list[2]], dtype=tf.float32), name="b") self.third_logits = tf.nn.xw_plus_b(self.third_fc_out, W, b, name="logits") self.third_scores = tf.sigmoid(self.third_logits, name="scores") # Final scores with tf.name_scope("output"): self.scores = tf.concat( [self.first_scores, self.second_scores, self.third_scores], axis=1, name="scores") # Calculate mean cross-entropy loss, L2 loss with tf.name_scope("loss"): def cal_loss(labels, logits, name): losses = tf.nn.sigmoid_cross_entropy_with_logits(labels=labels, logits=logits) losses = tf.reduce_mean(tf.reduce_sum(losses, axis=1), name=name + "sigmoid_losses") return losses # Loss losses_1 = cal_loss(labels=self.input_y_first, logits=self.first_logits, name="first_") losses_2 = cal_loss(labels=self.input_y_second, logits=self.second_logits, name="second_") losses_3 = cal_loss(labels=self.input_y_third, logits=self.third_logits, name="third_") losses = tf.add_n([losses_1, losses_2, losses_3], name="losses") # L2 Loss l2_losses = tf.add_n([ tf.nn.l2_loss(tf.cast(v, tf.float32)) for v in tf.trainable_variables() ], name="l2_losses") * l2_reg_lambda self.loss = tf.add_n([losses, l2_losses], name="loss")
output = tf.layers.dense(output, 32) logits = tf.layers.dense(output, 1) # 计算损失 with tf.variable_scope("loss"): loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(labels=labels_, logits=logits)) # 选择优化器 with tf.variable_scope("train_step"): global_step = tf.Variable(0, name="global_step", trainable=False) global_add = global_step.assign_add(1) # 用于计数 train_op = tf.train.AdamOptimizer(params.lr).minimize(loss) # 准确率/f1/p/r计算 with tf.variable_scope("evaluation"): pred = tf.cast(tf.greater(tf.sigmoid(logits), 0.26), tf.float32) accuracy = tf.reduce_mean(tf.cast(tf.equal(pred, labels_), tf.float32), name="accuracy") # 混淆矩阵 # _|0 |1 | # 0|2 |3 | # 1|2 |3 | true = tf.reshape(labels_, (-1,)) pred = tf.reshape(pred, (-1,)) epsilon = 1e-7 cm = tf.contrib.metrics.confusion_matrix(true, pred, num_classes=2) precision = tf.cast(cm[1][1] / tf.reduce_sum(cm[:, 1]), tf.float32, name="precision") recall = tf.cast(cm[1][1] / tf.reduce_sum(cm[1], axis=0), tf.float32, name="recall") f1_score = tf.cast((2 * precision * recall / (precision + recall + epsilon)), tf.float32, name="f1_score")
def main(): tf.reset_default_graph() print("\n\n") print("Defining placeholders...") input_genre = tf.placeholder(dtype=tf.int32, shape=GENERATOR_BATCH_SIZE) latent_vector = tf.placeholder(dtype=tf.float32, shape=[GENERATOR_BATCH_SIZE, LATENT_SIZE]) real_data = tf.placeholder(dtype=tf.bool, shape=[ REAL_DATA_BATCH_SIZE, NUM_BARS, BEATS_PER_BAR, NUM_NOTES, NUM_TRACKS ]) real_data = tf.cast(real_data, tf.float32) discriminator_labels_real = tf.placeholder(dtype=tf.float32, shape=REAL_DATA_BATCH_SIZE) discriminator_labels_fake = tf.placeholder(dtype=tf.float32, shape=GENERATOR_BATCH_SIZE) #real_data_labels = tf.placeholder(dtype = tf.int32, shape = None) #labels = tf.one_hot(real_data_labels, NUM_CLASSES) print("Constructing Model...") generator_out = Generator(input_genre, latent_vector, LATENT_SIZE, NUM_TRACKS, NUM_CLASSES) #refiner_out = Refiner(generator_out, NUM_TRACKS, RESIDUAL_LAYERS, SLOPE_TENSOR) with tf.variable_scope('Discriminator') as scope: fake_out = Discriminator(generator_out, NUM_TRACKS) scope.reuse_variables() real_out = Discriminator(real_data, NUM_TRACKS) classifier_out, classifier_out_1, classifier_out_2, classifier_out_3 = Classifier( generator_out, NUM_TRACKS, NUM_CLASSES) #print("Real_data", real_data) print("\n\n") print("Generator out: ", generator_out) #print("Refiner out: ", refiner_out) print("Fake out: ", fake_out) print("Real out: ", real_out) print("Classifier out: ", classifier_out) print("\n\n") variance = tf.reduce_mean(tf.square(tf.subtract(generator_out, 0.5)), [0, 1, 2, 3, 4], name=None, keep_dims=False) #LOAD MODEL AND GENERATE MUSIC classifier_labels = tf.one_hot(input_genre, NUM_CLASSES) #TRAIN GAN #classifier_accuracy, acc_op = tf.metrics.accuracy(input_genre, tf.argmax(classifier_out, 1)) classifier_accuracy, acc_op = tf.metrics.accuracy( input_genre, tf.argmax(classifier_out, 1)) discriminator_accuracy, acc_disc = tf.metrics.accuracy( tf.concat([ tf.ones(fake_out.get_shape(), dtype=tf.int32), tf.zeros(real_out.get_shape(), dtype=tf.int32) ], axis=-1), tf.round(tf.sigmoid(tf.concat([fake_out, real_out], axis=-1)))) #DECLARE LOSS FUNCTIONS generator_loss, discriminator_loss = VAC_GAN_loss( fake_out, real_out, classifier_out, classifier_labels, discriminator_labels_real, discriminator_labels_fake, variance) #print(generator_loss) #print(discriminator_loss) #DECLARE TRAINABLE_VARIABLES generator_varlist = list( filter(lambda a: "generator" in a.name, [v for v in tf.trainable_variables()])) discriminator_varlist = list( filter(lambda a: "discriminator" in a.name, [v for v in tf.trainable_variables()])) generator_optim = tf.train.AdamOptimizer( learning_rate=GENERATOR_LEARNING_RATE, beta1=BETA_1, beta2=BETA_2, epsilon=1e-08, use_locking=False, name='Generator_Optimizer').minimize(generator_loss, var_list=generator_varlist) discriminator_optim = tf.train.AdamOptimizer( learning_rate=DISCRIMINATOR_LEARNING_RATE, beta1=BETA_1, beta2=BETA_2, epsilon=1e-08, use_locking=False, name='Discriminator_Optimizer').minimize( discriminator_loss, var_list=discriminator_varlist) #INITIALIZE VARIABLES print("Initialising session...") init_g = tf.global_variables_initializer() init_l = tf.local_variables_initializer() sess = tf.Session() sess.run(init_g) sess.run(init_l) saver = tf.train.Saver() def optimistic_restore(session, save_file, graph=tf.get_default_graph()): reader = tf.train.NewCheckpointReader(save_file) saved_shapes = reader.get_variable_to_shape_map() var_names = sorted([(var.name, var.name.split(':')[0]) for var in tf.global_variables() if var.name.split(':')[0] in saved_shapes]) restore_vars = [] for var_name, saved_var_name in var_names: curr_var = graph.get_tensor_by_name(var_name) var_shape = curr_var.get_shape().as_list() if var_shape == saved_shapes[saved_var_name]: restore_vars.append(curr_var) opt_saver = tf.train.Saver(restore_vars) opt_saver.restore(session, save_file) #LOAD IN CLASSIFIER_WEIGHTS checkpoint_path = r'/home/cofphe/Documents/jacob-luka/Models/saved_models_2018-12-13_00:29:45' optimistic_restore(sess, tf.train.latest_checkpoint(checkpoint_path)) data_path = abspath(sys.argv[1]) print("Loading in Data from: ", data_path) data = Data(data_path) #Path to directory containing music set """ #TEST TO MAKE SURE CLASSIFIER WEIGHTS LOADING CORRECTLY classifier_accuracy = [] for i in tqdm(range(100)): genre_batch = data.get_genre() data_batch, label_batch = data.get_batch() latent_batch = data.get_noise() #print(genre_batch) #print(latent_batch) classifier_accuracy.append(sess.run([acc_op], feed_dict={input_genre: genre_batch, latent_vector: latent_batch, real_data: data_batch, real_data_labels: label_batch})[0]) print(np.mean(np.asarray(classifier_accuracy))) #exit() """ models_directory = join( sys.argv[2], ("saved_models_" + datetime.now().strftime('%Y-%m-%d_%H:%M:%S'))) os.makedirs(models_directory, exist_ok=True) accuracy_old = 0 songs_directory = join( sys.argv[3], ("generated_song_" + datetime.now().strftime('%Y-%m-%d_%H:%M:%S'))) os.makedirs(songs_directory, exist_ok=True) #with open(join(models_directory, "Model.csv"), 'w') as f: #f.write("LOSS,ACCURACY\n") """ #TEST TO MAKE SURE BATCHING IS DONE CORRECTLY data_label_test = [] for i in range(30): data_batch, label_batch = data.get_batch() data_label_test.append(sess.run([real_data_labels], feed_dict={real_data: data_batch, real_data_labels: label_batch})[0]) data_label_test = np.concatenate(data_label_test) print(np.sum(np.sum(data_label_test == 0))) print(np.sum(np.sum(data_label_test == 1))) print(np.sum(np.sum(data_label_test == 2))) exit() """ genre_batch = data.get_genre() data_batch = data.get_batch() latent_batch = data.get_noise() #print(data_batch) #print(genre_batch) #print(latent_batch) #print(sess.run([classifier_labels], feed_dict={input_genre: genre_batch, latent_vector: latent_batch, real_data: data_batch, real_data_labels: label_batch})[0][16:32]) #exit() BATCHES_PER_EPOCH = int(data.num_examples / REAL_DATA_BATCH_SIZE) #progress = trange(2000, desc = 'Bar_desc', leave = True) progress = trange(BATCHES_PER_EPOCH * GAN_EPOCHS, desc='Bar_desc', leave=True) for t in progress: genre_batch = data.get_genre() data_batch = data.get_batch() latent_batch = data.get_noise() discriminator_labels_real_batch, discriminator_labels_fake_batch = data.get_labels( ) #print(sess.run([variance],feed_dict={input_genre: genre_batch, latent_vector: latent_batch, real_data: data_batch, discriminator_labels_real: discriminator_labels_real_batch, discriminator_labels_fake: discriminator_labels_fake_batch})) if t % G_D_ASPECT_RATIO == 0: loss_generator, optim_generator, loss_discriminator, optim_discriminator, class_acc, disc_acc, variance_batch = sess.run( [ generator_loss, generator_optim, discriminator_loss, discriminator_optim, acc_op, acc_disc, variance ], feed_dict={ input_genre: genre_batch, latent_vector: latent_batch, real_data: data_batch, discriminator_labels_real: discriminator_labels_real_batch, discriminator_labels_fake: discriminator_labels_fake_batch }) else: loss_generator, optim_generator, class_acc, disc_acc, variance_batch = sess.run( [generator_loss, generator_optim, acc_op, acc_disc, variance], feed_dict={ input_genre: genre_batch, latent_vector: latent_batch, real_data: data_batch, discriminator_labels_real: discriminator_labels_real_batch, discriminator_labels_fake: discriminator_labels_fake_batch }) progress.set_description('GEN LOSS ===> ' + str(loss_generator) + ' DIS LOSS ===> ' + str(loss_discriminator) + ' CLASS ACC ===> ' + str(class_acc) + ' DISC ACC ===> ' + str(disc_acc) + ' VAR ===> ' + str(variance_batch)) progress.refresh() with open(join(models_directory, "Model.csv"), 'a') as f: f.write( str(loss_generator) + "," + str(loss_discriminator) + "," + str(class_acc) + "," + str(disc_acc) + "," + str(variance_batch) + "\n") if t % BATCHES_PER_EPOCH == 0 or t == BATCHES_PER_EPOCH * GAN_EPOCHS: print("Epoch Completed") print("Making Music") filename = "model-" + str( (t * BATCH_SIZE) / data.num_examples) + "-" + str(class_acc) saver.save(sess, join(models_directory, filename)) generated_music, generated_genre = sess.run( [tf.cast(tf.round(generator_out), tf.bool), input_genre], feed_dict={ input_genre: genre_batch, latent_vector: latent_batch, real_data: data_batch, discriminator_labels_real: discriminator_labels_real_batch, discriminator_labels_fake: discriminator_labels_fake_batch }) print("NUM NOTES", np.sum(generated_music)) for jj in range(GENERATOR_BATCH_SIZE): if generated_genre[jj] == 0: generate_genre_name = 'alternative' elif generated_genre[jj] == 1: generate_genre_name = 'rock' elif generated_genre[jj] == 2: generate_genre_name = 'classic' generated_phrase = generated_music[jj, :, :, :, :] convert_to_npz(generated_phrase, songs_directory, (str(jj) + '_' + generate_genre_name + '_EPOCH_' + str(int(t / BATCHES_PER_EPOCH)))) generator_out_batch, generated_music, generated_genre = sess.run( [ generator_out, tf.cast(tf.round(generator_out), tf.bool), input_genre ], feed_dict={ input_genre: genre_batch, latent_vector: latent_batch, real_data: data_batch, discriminator_labels_real: discriminator_labels_real_batch, discriminator_labels_fake: discriminator_labels_fake_batch }) print(generator_out_batch[0, 1, :, :, 1]) print('\n\n') print(generated_music[0, 1, :, :, 1]) for jj in range(GENERATOR_BATCH_SIZE): if generated_genre[jj] == 0: generate_genre_name = 'alternative' elif generated_genre[jj] == 1: generate_genre_name = 'rock' elif generated_genre[jj] == 2: generate_genre_name = 'classic' generated_phrase = generated_music[jj, :, :, :, :] convert_to_npz(generated_phrase, songs_directory, (str(jj) + '_' + generate_genre_name + '_EPOCH_' + str(int(t / BATCHES_PER_EPOCH))))
# https://stats.stackexchange.com/questions/7440/kl-divergence-between-two-univariate-gaussians kl_div = -0.5 * (log_var + 1 - mu**2 - tf.exp(log_var)) kl_div = tf.reduce_sum(kl_div) / x.shape[0] loss = rec_loss + 1. * kl_div grads = tape.gradient(loss, model.trainable_variables) optimizer.apply_gradients(zip(grads, model.trainable_variables)) if step % 100 == 0: print(epoch, step, 'kl div:', float(kl_div), 'rec loss:', float(rec_loss)) # evaluation z = tf.random.normal((batchsz, z_dim)) logits = model.decoder(z) x_hat = tf.sigmoid(logits) x_hat = tf.reshape(x_hat, [-1, 28, 28]).numpy() *255. x_hat = x_hat.astype(np.uint8) save_images(x_hat, 'vae_images/sampled_epoch%d.png'%epoch) x = next(iter(test_db)) x = tf.reshape(x, [-1, 784]) x_hat_logits, _, _ = model(x) x_hat = tf.sigmoid(x_hat_logits) x_hat = tf.reshape(x_hat, [-1, 28, 28]).numpy() *255. x_hat = x_hat.astype(np.uint8) save_images(x_hat, 'vae_images/rec_epoch%d.png'%epoch)
def Generator(input_genre, latent_vector, LATENT_SIZE, NUM_TRACKS, NUM_CLASSES): def my_leaky_relu(x): return tf.nn.leaky_relu(x, alpha=.5) #Class Embedding Layer embedding_layer = tf.keras.layers.Embedding( NUM_CLASSES, LATENT_SIZE, embeddings_initializer='glorot_normal', name='generator_embedding') embedding_lookup = embedding_layer.__call__(input_genre) class_input = tf.multiply(latent_vector, embedding_lookup, name='generator_multiply') bn_0 = tf.layers.batch_normalization(class_input, name='generator_batch_norm_0') #print(class_input) #Shared Generator def shared_generator(class_input): dense_1 = tf.layers.dense(bn_0, (3 * 128), activation=my_leaky_relu, name='generator_dense_1') bn_1 = tf.layers.batch_normalization(dense_1, name='generator_batch_norm_1') reshape_1 = tf.reshape(bn_1, [-1, 3, 1, 1, 128], name='generator_reshape_1') trans_conv3d_1 = tf.layers.conv3d_transpose( reshape_1, 64, (2, 1, 1), (1, 1, 1), activation=my_leaky_relu, name='generator_transconv3d_1') bn_2 = tf.layers.batch_normalization(trans_conv3d_1, name='generator_batch_norm_2') trans_conv3d_2 = tf.layers.conv3d_transpose( bn_2, 32, (1, 4, 1), (1, 4, 1), activation=my_leaky_relu, name='generator_transconv3d_2') bn_3 = tf.layers.batch_normalization(trans_conv3d_2, name='generator_batch_norm_3') trans_conv3d_3 = tf.layers.conv3d_transpose( bn_3, 32, (1, 1, 3), (1, 1, 3), activation=my_leaky_relu, name='generator_transconv3d_3') bn_4 = tf.layers.batch_normalization(trans_conv3d_3, name='generator_batch_norm_4') trans_conv3d_4 = tf.layers.conv3d_transpose( bn_4, 16, (1, 4, 1), (1, 4, 1), activation=my_leaky_relu, name='generator_transconv3d_4') bn_5 = tf.layers.batch_normalization(trans_conv3d_4, name='generator_batch_norm_5') trans_conv3d_5 = tf.layers.conv3d_transpose( bn_5, 16, (1, 1, 3), (1, 1, 2), activation=my_leaky_relu, name='generator_transconv3d_5') shared_out = tf.layers.batch_normalization( trans_conv3d_5, name='generator_batch_norm_6') #blah1 = tf.layers.conv3d_transpose(shared_out, 16, (1, 1, 12), (1, 1, 12), activation = my_leaky_relu, name = ('generator_pt_conv3d_1')) #print(blah1) #blah2 = tf.layers.conv3d_transpose(blah1, 16, (1, 6, 1), (1, 6, 1), activation = my_leaky_relu, name = ('generator_pt_conv3d_2')) #print(blah2) #generator_out_1 = tf.sigmoid(tf.layers.conv3d_transpose(blah2, 1, (1, 1, 1), (1, 1, 1), name = ('generator_merged_conv3d')) #print(blah3) #print(shared_out) return shared_out #Private Generator def pitch_time_private(shared_out, track_num): pt_conv3d_1 = tf.layers.conv3d_transpose( shared_out, 16, (1, 1, 12), (1, 1, 12), activation=my_leaky_relu, name=('generator_pt_conv3d_1' + str(track_num))) pt_bn_1 = tf.layers.batch_normalization(pt_conv3d_1, name=('generator_pt_bn_1' + str(track_num))) pt_conv3d_2 = tf.layers.conv3d_transpose( pt_bn_1, 8, (1, 6, 1), (1, 6, 1), activation=my_leaky_relu, name=('generator_pt_conv3d_2' + str(track_num))) pt_bn_2 = tf.layers.batch_normalization(pt_conv3d_2, name=('generator_pt_bn_2' + str(track_num))) return pt_bn_2 def time_pitch_private(shared_out, track_num): tp_conv3d_1 = tf.layers.conv3d_transpose( shared_out, 16, (1, 6, 1), (1, 6, 1), activation=my_leaky_relu, name=('generator_tp_conv3d_1' + str(track_num))) tp_bn_1 = tf.layers.batch_normalization(tp_conv3d_1, name=('generator_tp_bn_1' + str(track_num))) tp_conv3d_2 = tf.layers.conv3d_transpose( tp_bn_1, 8, (1, 1, 12), (1, 1, 12), activation=my_leaky_relu, name=('generator_tp_conv3d_2' + str(track_num))) tp_bn_2 = tf.layers.batch_normalization(tp_conv3d_2, name=('generator_tp_bn_2' + str(track_num))) return tp_bn_2 def merged_private(private_out, track_num): merged_conv3d = tf.layers.conv3d_transpose( private_out, 1, (1, 1, 1), (1, 1, 1), name=('generator_merged_conv3d' + str(track_num))) merged_bn = tf.layers.batch_normalization(merged_conv3d, name=('generator_merged_bn' + str(track_num))) return merged_bn #Loop Private Generators over all tracks and concat shared_out = shared_generator(class_input) #print(shared_out) private_out = [] for i in range(NUM_TRACKS): private_out.append( merged_private( tf.concat([ pitch_time_private(shared_out, i), time_pitch_private(shared_out, i) ], -1), i)) generator_out = tf.sigmoid(tf.concat(private_out, -1)) return generator_out
def attention_decoder(decoder_inputs, initial_state, encoder_states, cell, encoder_section_states=None, num_words_section=None, enc_padding_mask=None, enc_section_padding_mask=None, initial_state_attention=False, pointer_gen=True, use_coverage=False, prev_coverage=None, temperature=None): """ Args: decoder_inputs: A list of 2D Tensors [batch_size x input_size]. initial_state: 2D Tensor [batch_size x cell.state_size]. encoder_states: 3D Tensor [batch_size x seq_len x encoder_output_size]. cell: rnn_cell.RNNCell defining the cell function and size. encoder_section_states: 3D Tensor [batch_size x section_seq_len x encoder_output_size]. Pass None if you don't want hierarchical attentive decoding num_words_section: number of words per section [batch_size x section_seq_len] enc_padding_mask: 2D Tensor [batch_size x attn_length] containing 1s and 0s; indicates which of the encoder locations are padding (0) or a real token (1). enc_section_padding_mask: 3D Tensor [batch_size x num_sections x section_len] initial_state_attention: Note that this attention decoder passes each decoder input through a linear layer with the previous step's context vector to get a modified version of the input. If initial_state_attention is False, on the first decoder step the "previous context vector" is just a zero vector. If initial_state_attention is True, we use initial_state to (re)calculate the previous step's context vector. We set this to False for train/eval mode (because we call attention_decoder once for all decoder steps) and True for decode mode (because we call attention_decoder once for each decoder step). pointer_gen: boolean. If True, calculate the generation probability p_gen for each decoder step. use_coverage: boolean. If True, use coverage mechanism. prev_coverage: If not None, a tensor with shape (batch_size, seq_len). The previous step's coverage vector. This is only not None in decode mode when using coverage. simulating the temperature hyperparam for softmax: set to 1.0 for starters Returns: outputs: A list of the same length as decoder_inputs of 2D Tensors of shape [batch_size x cell.output_size]. The output vectors. state: The final state of the decoder. A tensor shape [batch_size x cell.state_size]. attn_dists: A list containing tensors of shape (batch_size,seq_len). The attention distributions for each decoder step. p_gens: p_gens: List of length input_size, containing tensors of shape [batch_size, 1]. The values of p_gen for each decoder step. Empty list if pointer_gen=False. coverage: Coverage vector on the last step computed. None if use_coverage=False. """ print('encoder_states.shape', encoder_states.shape) print('decoder_inputs[0].shape', decoder_inputs[0].shape) with variable_scope.variable_scope("attention_decoder") as scope: batch_size = encoder_states.get_shape( )[0].value # if this line fails, it's because the batch size isn't defined enc_output_size = encoder_states.get_shape( )[2].value # encoder state size, if this line fails, it's because the attention length isn't defined # Indicator variable for hierarchical attention hier = True if encoder_section_states is not None else False # Reshape encoder_states (need to insert a dim) encoder_states = tf.expand_dims( encoder_states, axis=2) # now is shape (batch_size, attn_len, 1, enc_output_size) # To calculate attention, we calculate # v^T tanh (W_h h_i + W_s s_t + b_attn) # where h_i is an encoder state, and s_t a decoder state. # attn_vec_size is the length of the vectors v, b_attn, (W_h h_i) and (W_s s_t). # (W_h h_i) is encoder_features, (W_s s_t) + b_att is decoder_features # We set it to be equal to the size of the encoder states. attention_vec_size = enc_output_size # Get the weight matrix W_h and apply it to each encoder state to get (W_h h_i), the encoder features # To multiply batch_size number of time_step sizes of encoder states # by W_h, we can use conv2d with stride of 1 W_h = variable_scope.get_variable( "W_h", [1, 1, enc_output_size, attention_vec_size]) encoder_features = nn_ops.conv2d( encoder_states, W_h, [1, 1, 1, 1], "SAME") # shape (batch_size,seq_len,1,attention_vec_size) # encoder_features = tf.Print(encoder_features, [tf.shape(encoder_features)], # 'encoder_features.shape = ') if hier: enc_sec_output_size = encoder_section_states.get_shape()[2].value encoder_section_states = tf.expand_dims(encoder_section_states, axis=2) W_h_s = variable_scope.get_variable( "W_h_s", [1, 1, enc_sec_output_size, attention_vec_size]) encoder_section_features = nn_ops.conv2d( encoder_section_states, W_h_s, [1, 1, 1, 1], "SAME") # shape (batch_size,seq_len,1,attention_vec_size) v_sec = variable_scope.get_variable("v_sec", [attention_vec_size]) # encoder_section_features = tf.Print(encoder_section_features, [tf.shape(encoder_section_features)], # 'encoder_section_features.shape = ') # Get the weight vectors v and w_c (w_c is for coverage) # v^T tanh (W_h h_i + W_s s_t + W_c c_t + b_attn) # c_t = \sum_{i=1}^{t-1} a^i (sum of all attention weights in the previous step) shape=(batch_size, seq_len) v = variable_scope.get_variable("v", [attention_vec_size]) if use_coverage: with variable_scope.variable_scope("coverage"): w_c = variable_scope.get_variable( "w_c", [1, 1, 1, attention_vec_size]) if prev_coverage is not None: # for beam search mode with coverage # reshape from (batch_size, seq_len) to (batch_size, attn_len, 1, 1) prev_coverage = tf.expand_dims(tf.expand_dims(prev_coverage, 2), 3) def attention(decoder_state, coverage=None, num_words_section=None, step=None): """Calculate the context vector and attention distribution from the decoder state. Args: decoder_state: state of the decoder coverage: Optional. Previous timestep's coverage vector, shape (batch_size, attn_len, 1, 1). num_words_section: number of words in each section (only needed for hierarchical attention) [batch_size, num_sections] -- assumes number of sections in the batch is equal (TODO: check sanity) step: index of the current decoder step (needed for section attention) Returns: context_vector: weighted sum of encoder_states attn_dist: attention distribution coverage: new coverage vector. shape (batch_size, attn_len, 1, 1) """ with variable_scope.variable_scope("Attention"): # Pass the decoder state through a linear layer (this is W_s s_t + b_attn in the paper) # (W_s s_t) + b_att is decoder_features; s_t = decoder_state decoder_features = linear( decoder_state, attention_vec_size, True) # shape (batch_size, attention_vec_size) decoder_features = tf.expand_dims( tf.expand_dims(decoder_features, 1), 1) # reshape to (batch_size, 1, 1, attention_vec_size) def masked_attention(e, enc_padding_mask): if enc_section_padding_mask is not None: enc_padding_mask = tf.reshape(enc_section_padding_mask, [batch_size, -1]) enc_padding_mask = tf.cast(enc_padding_mask, tf.float32) """Take softmax of e then apply enc_padding_mask and re-normalize""" attn_dist = nn_ops.softmax( e) # take softmax. shape (batch_size, attn_length) attn_dist *= enc_padding_mask # apply mask masked_sums = tf.reduce_sum(attn_dist, axis=1) # shape (batch_size) return attn_dist / tf.reshape(masked_sums, [-1, 1]) # re-normalize if use_coverage and coverage is not None: # non-first step of coverage if not hier: # Multiply coverage vector by w_c to get coverage_features. coverage_features = nn_ops.conv2d( coverage, w_c, [1, 1, 1, 1], "SAME" ) # c has shape (batch_size, seq_len, 1, attention_vec_size) # Calculate v^T tanh(W_h h_i + W_s s_t + w_c c_i^t + b_attn) e = math_ops.reduce_sum( v * math_ops.tanh(encoder_features + decoder_features + coverage_features), [2, 3]) # shape (batch_size,seq_len) # Take softmax of e to get the attention distribution # attn_dist = nn_ops.softmax(e) # shape (batch_size, seq_len) attn_dist = masked_attention(e, enc_padding_mask) # Update coverage vector coverage += array_ops.reshape( attn_dist, [batch_size, -1, 1, 1 ]) # shape=(batch_size, seq_len,1,1) else: with tf.variable_scope("attention_sections"): if FLAGS.fixed_attn: tf.logging.debug('running with fixed attn', '\r') decoder_features_sec = linear( decoder_state, attention_vec_size, True, scope='Linear--Section-Features' ) # shape (batch_size, attention_vec_size) decoder_features_sec = tf.expand_dims( tf.expand_dims(decoder_features_sec, 1), 1 ) # reshape to (batch_size, 1, 1, attention_vec_size) e_sec = math_ops.reduce_sum( v_sec * math_ops.tanh(encoder_section_features + decoder_features_sec), [2, 3]) # [batch_size x seq_len_sections] attn_dist_sec = nn_ops.softmax(e_sec) else: e_sec = math_ops.reduce_sum( v_sec * math_ops.tanh(encoder_section_features + decoder_features), [2, 3]) # [batch_size x seq_len_sections] attn_dist_sec = nn_ops.softmax(e_sec) with tf.variable_scope("attention_words"): coverage_features = nn_ops.conv2d( coverage, w_c, [1, 1, 1, 1], "SAME" ) # c has shape (batch_size, seq_len, 1, attention_vec_size) # Calculate v^T tanh(W_h h_i + W_s s_t + w_c c_i^t + b_attn) e = math_ops.reduce_sum( v * math_ops.tanh(encoder_features + decoder_features + coverage_features), [2, 3]) # shape (batch_size,seq_len) # Multiply by section weights e = tf.reshape( e, [batch_size, -1, num_words_section[0][0]]) e = tf.multiply(e, attn_dist_sec[:, :, tf.newaxis]) e = tf.reshape(e, [batch_size, -1]) # --- Some hack for reweighting attention (similar to temp for softmax) if temperature > 0.0: e = e * temperature attn_dist = masked_attention(e, enc_padding_mask) coverage += array_ops.reshape( attn_dist, [batch_size, -1, 1, 1 ]) # shape=(batch_size, seq_len,1,1) else: # Calculate v^T tanh(W_h h_i + W_s s_t + b_attn) if hier: with tf.variable_scope("attention_sections"): if FLAGS.fixed_attn: decoder_features_sec = linear( decoder_state, attention_vec_size, True, scope='Linear--Section-Features' ) # shape (batch_size, attention_vec_size) decoder_features_sec = tf.expand_dims( tf.expand_dims(decoder_features_sec, 1), 1 ) # reshape to (batch_size, 1, 1, attention_vec_size) e_sec = math_ops.reduce_sum( v_sec * math_ops.tanh(encoder_section_features + decoder_features_sec), [2, 3]) # [batch_size x seq_len_sections] attn_dist_sec = nn_ops.softmax(e_sec) else: e_sec = math_ops.reduce_sum( v_sec * math_ops.tanh(encoder_section_features + decoder_features), [2, 3]) # [batch_size x seq_len_sections] attn_dist_sec = nn_ops.softmax(e_sec) with tf.variable_scope("attention_words"): e = math_ops.reduce_sum( v * math_ops.tanh(encoder_features + decoder_features), [2, 3]) #[batch_size x seq_len] e = tf.reshape( e, [batch_size, -1, num_words_section[0][0]]) e = tf.multiply(e, attn_dist_sec[:, :, tf.newaxis]) e = tf.reshape(e, [batch_size, -1]) if temperature > 0.0: e = e * temperature attn_dist = masked_attention(e, enc_padding_mask) else: e = math_ops.reduce_sum( v * math_ops.tanh(encoder_features + decoder_features), [2, 3]) # calculate e # Take softmax of e to get the attention distribution if enc_padding_mask is not None: attn_dist = masked_attention(e, enc_padding_mask) else: attn_dist = nn_ops.softmax( e) # shape (batch_size, seq_len) if use_coverage: # first step of training coverage = tf.expand_dims(tf.expand_dims(attn_dist, 2), 2) # initialize coverage # TODO: coverage for hier # Calculate the context vector from attn_dist and encoder_states # ecnoder_sates = [batch , seq_len , 1 , encoder_output_size], attn_dist = [batch, seq_len, 1, 1] context_vector = math_ops.reduce_sum( array_ops.reshape(attn_dist, [batch_size, -1, 1, 1]) * encoder_states, [1, 2]) # shape (batch_size, enc_output_size). context_vector = array_ops.reshape(context_vector, [-1, enc_output_size]) if hier: return context_vector, attn_dist, coverage, attn_dist_sec else: return context_vector, attn_dist, coverage outputs = [] attn_dists = [] attn_dists_sec_list = [] p_gens = [] state = initial_state coverage = prev_coverage # initialize coverage to None or whatever was passed in context_vector = array_ops.zeros([batch_size, enc_output_size]) context_vector.set_shape([ None, enc_output_size ]) # Ensure the second shape of attention vectors is set. if initial_state_attention: # true in decode mode # Re-calculate the context vector from the previous step so that we can pass it through a linear layer with this step's input to get a modified version of the input if hier: context_vector, attn_dist, coverage, attn_dist_sec = attention( initial_state, coverage, num_words_section ) # in decode mode, this is what updates the coverage vector else: context_vector, _, coverage = attention( initial_state, coverage ) # in decode mode, this is what updates the coverage vector for i, inp in enumerate(decoder_inputs): if (i % 1) == 0: print("Adding attention_decoder timesteps. %i done of %i" % (i + 1, len(decoder_inputs)), end='\r') if i > 0: variable_scope.get_variable_scope().reuse_variables() # Merge input and previous attentions into one vector x of the same size as inp # inp is [batch_size, input_size] input_size = inp.get_shape().with_rank(2)[1] if input_size.value is None: raise ValueError("Could not infer input size from input: %s" % inp.name) x = linear([inp] + [context_vector], input_size, True) # Run the decoder RNN cell. cell_output = decoder state # print("x.shape", x.shape) # try: # print("state.shape", state.shape) # except AttributeError: # print("state.c.shape", state.c.shape) cell_output, state = cell(x, state) # Run the attention mechanism. if i == 0 and initial_state_attention: # always true in decode mode with variable_scope.variable_scope( variable_scope.get_variable_scope(), reuse=True ): # you need this because you've already run the initial attention(...) call if hier: context_vector, attn_dist, coverage, attn_dist_sec = attention( state, coverage, num_words_section) else: context_vector, attn_dist, _ = attention( state, coverage) # don't allow coverage to update else: if hier: context_vector, attn_dist, coverage, attn_dist_sec = attention( state, coverage, num_words_section) else: context_vector, attn_dist, coverage = attention( state, coverage) # TODO: delete # Added for debug purpuses # def _debug_func(context_vector, attn_dist, encoder_features, encoder_section_states, encoder_states): # print('context_vector', context_vector.shape, context_vector) # print('attn_dist', attn_dist.shape, attn_dist) # print('encoder_features', encoder_features.shape, encoder_features) # print('encoder_section_states', encoder_section_states.shape, encoder_section_states) # print('encoder_states', encoder_states.shape, encoder_states) # import pdb; pdb.set_trace() # return False # debug_op = tf.py_func(_debug_func, [context_vector, attn_dist, encoder_features, encoder_section_states, encoder_states], [tf.bool]) # with tf.control_dependencies(debug_op): # context_vector = tf.identity(context_vector, name='context_vector') attn_dists.append(attn_dist) if hier: attn_dists_sec_list.append(attn_dist_sec) # Calculate p_gen if pointer_gen: with tf.variable_scope('calculate_pgen'): p_gen = linear([context_vector, state.c, state.h, x], 1, True) # a scalar p_gen = tf.sigmoid(p_gen) p_gens.append(p_gen) # Concatenate the cell_output (= decoder state) and the context vector, and pass them through a linear layer # This is V[s_t, h*_t] + b in the paper with variable_scope.variable_scope("AttnOutputProjection"): output = linear([cell_output] + [context_vector], cell.output_size, True) outputs.append(output) # If using coverage, reshape it if coverage is not None: coverage = array_ops.reshape(coverage, [batch_size, -1]) return outputs, state, attn_dists, p_gens, coverage, attn_dists_sec_list
def sigmoid(self, x, name=None): return tf.sigmoid(x, name=name)
def pass_through_sigmoid(x, slope=1): """Sigmoid that uses identity function as its gradient""" g = tf.get_default_graph() with ops.name_scope("PassThroughSigmoid") as name: with g.gradient_override_map({"Sigmoid": "Identity"}): return tf.sigmoid(x, name=name)
b_conv2 = bias_variable([64]) h_conv2 = tf.nn.relu(conv2d(h_pool1, W_conv2) + b_conv2) h_pool2 = max_pool_3x3(h_conv2) #连接层 #加入一个有512个神经元的全连接层,用于处理整个图片。 W_fc1 = weight_variable([3 * 3 * 64, 20]) b_fc1 = bias_variable([20]) h_pool2_flat = tf.reshape(h_pool2, [-1, 3 * 3 * 64]) h_fc1 = tf.nn.relu(tf.matmul(h_pool2_flat, W_fc1) + b_fc1) #为了减少过拟合,我们在输出层之前加入dropout keep_prob = tf.placeholder(tf.float32) h_fc1_drop = tf.nn.dropout(h_fc1, keep_prob) #输出层 W_fc2 = weight_variable([20, 1]) b_fc2 = bias_variable([1]) y_conv = tf.sigmoid(tf.matmul(h_fc1_drop, W_fc2) + b_fc2) #损失函数 cross_entropy = -tf.reduce_sum(y_ * tf.log(y_conv)) train_step = tf.train.AdamOptimizer(1e-4).minimize(cross_entropy) #cross_entropy = tf.reduce_mean(-tf.reduce_sum(y_ * tf.log(y_conv), reduction_indices=[1])) #train_step = tf.train.AdamOptimizer(1e-4).minimize(cross_entropy) correct_prediction = tf.equal(tf.argmax(y_conv, 1), tf.argmax(y_, 1)) accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) #初始化变量 sess.run(tf.global_variables_initializer()) ''' 分割训练数据 ''' def split_data(df_vec):
net_name = 'squeeze_normal-drone-dev' folder_name = './networks/%s' % net_name with gfile.FastGFile(folder_name + "/minimal_graph_quant.pb", 'rb') as f: graph_def = tf.GraphDef() graph_def.ParseFromString(f.read()) tf.import_graph_def(graph_def, name='') sq_graph = tf.get_default_graph() inp_batch = sq_graph.get_tensor_by_name('Input_batching/batch:0') t_activations = sq_graph.get_tensor_by_name('activation/activations:0') print(inp_batch) k = p.ANCHOR_COUNT t_deltas = tf.slice(t_activations, [0, 0, 0, 0], [-1, -1, -1, 4 * k]) t_gammas = tf.sigmoid( tf.slice(t_activations, [0, 0, 0, 4 * k], [-1, -1, -1, k])) t_classes = tf.slice(t_activations, [0, 0, 0, 5 * k], [-1, -1, -1, p.OUT_CLASSES * k]) t_chosen_anchor = tf.argmax(t_gammas, axis=3) all_out = [t_deltas, t_gammas, t_classes, t_chosen_anchor] sess = tf.Session() batch_size = 1 print('loading image.. ', end='') def read_resize(pic): return misc.imresize(misc.imread(pic), (256, 256))
def cw(model, x, y=None, eps=1.0, ord_=2, T=2, optimizer=tf.train.AdamOptimizer(learning_rate=0.1), alpha=0.9, min_prob=0, clip=(0.0, 1.0)): """CarliniWagner (CW) attack. Only CW-L2 and CW-Linf are implemented since I do not see the point of embedding CW-L2 in CW-L1. See https://arxiv.org/abs/1608.04644 for details. The idea of CW attack is to minimize a loss that comprises two parts: a) the p-norm distance between the original image and the adversarial image, and b) a term that encourages the incorrect classification of the adversarial images. Please note that CW is a optimization process, so it is tricky. There are lots of hyper-parameters to tune in order to get the best result. The binary search process for the best eps values is omitted here. You could do grid search to find the best parameter configuration, if you like. I demonstrate binary search for the best result in an example code. :param model: The model wrapper. :param x: The input clean sample, usually a placeholder. NOTE that the shape of x MUST be static, i.e., fixed when constructing the graph. This is because there are some variables that depends upon this shape. :param y: The target label. Set to be the least-likely label when None. :param eps: The scaling factor for the second penalty term. :param ord_: The p-norm, 2 or inf. Actually I only test whether it is 2 or not 2. :param T: The temperature for sigmoid function. In the original paper, the author used (tanh(x)+1)/2 = sigmoid(2x), i.e., t=2. During our experiment, we found that this parameter also affects the quality of generated adversarial samples. :param optimizer: The optimizer used to minimize the CW loss. Default to be tf.AdamOptimizer with learning rate 0.1. Note the learning rate is much larger than normal learning rate. :param alpha: Used only in CW-L0. The decreasing factor for the upper bound of noise. :param min_prob: The minimum confidence of adversarial examples. Generally larger min_prob wil lresult in more noise. :param clip: A tuple (clip_min, clip_max), which denotes the range of values in x. :return: A tuple (train_op, xadv, noise). Run train_op for some epochs to generate the adversarial image, then run xadv to get the final adversarial image. Noise is in the sigmoid-space instead of the input space. It is returned because we need to clear noise before each batched attacks. """ xshape = x.get_shape().as_list() noise = tf.get_variable('noise', xshape, tf.float32, initializer=tf.initializers.zeros) # scale input to (0, 1) x_scaled = (x - clip[0]) / (clip[1] - clip[0]) # change to sigmoid-space, clip to avoid overflow. z = tf.clip_by_value(x_scaled, 1e-8, 1-1e-8) xinv = tf.log(z / (1 - z)) / T # add noise in sigmoid-space and map back to input domain xadv = tf.sigmoid(T * (xinv + noise)) xadv = xadv * (clip[1] - clip[0]) + clip[0] ybar, logits = model(xadv, logits=True) ydim = ybar.get_shape().as_list()[1] if y is not None: y = tf.cond(tf.equal(tf.rank(y), 0), lambda: tf.fill([xshape[0]], y), lambda: tf.identity(y)) else: # we set target to the least-likely label y = tf.argmin(ybar, axis=1, output_type=tf.int32) mask = tf.one_hot(y, ydim, on_value=0.0, off_value=float('inf')) yt = tf.reduce_max(logits - mask, axis=1) yo = tf.reduce_max(logits, axis=1) # encourage to classify to a wrong category loss0 = tf.nn.relu(yo - yt + min_prob) axis = list(range(1, len(xshape))) ord_ = float(ord_) # make sure the adversarial images are visually close if 2 == ord_: # CW-L2 Original paper uses the reduce_sum version. These two # implementation does not differ much. # loss1 = tf.reduce_sum(tf.square(xadv-x), axis=axis) loss1 = tf.reduce_mean(tf.square(xadv-x)) else: # CW-Linf tau0 = tf.fill([xshape[0]] + [1]*len(axis), clip[1]) tau = tf.get_variable('cw8-noise-upperbound', dtype=tf.float32, initializer=tau0, trainable=False) diff = xadv - x - tau # if all values are smaller than the upper bound value tau, we reduce # this value via tau*0.9 to make sure L-inf does not get stuck. tau = alpha * tf.to_float(tf.reduce_all(diff < 0, axis=axis)) loss1 = tf.nn.relu(tf.reduce_sum(diff, axis=axis)) loss = eps*loss0 + loss1 train_op = optimizer.minimize(loss, var_list=[noise]) # We may need to update tau after each iteration. Refer to the CW-Linf # section in the original paper. if 2 != ord_: train_op = tf.group(train_op, tau) return train_op, xadv, noise
cs = [0] * T # sequence of canvases mus, logsigmas, sigmas = [0] * T, [0] * T, [ 0 ] * T # gaussian params generated by SampleQ. We will need these for computing loss. # initial states h_dec_prev = tf.zeros((batch_size, dec_size)) enc_state = lstm_enc.zero_state(batch_size, tf.float32) dec_state = lstm_dec.zero_state(batch_size, tf.float32) ## DRAW MODEL ## # construct the unrolled computational graph for t in range(T): c_prev = tf.zeros((batch_size, img_size)) if t == 0 else cs[t - 1] x_hat = x - tf.sigmoid(c_prev) # error image r = read(x, x_hat, h_dec_prev) h_enc, enc_state = encode(enc_state, tf.concat([r, h_dec_prev], 1)) z, mus[t], logsigmas[t], sigmas[t] = sampleQ(h_enc) h_dec, dec_state = decode(dec_state, z) cs[t] = c_prev + write(h_dec) # store results h_dec_prev = h_dec DO_SHARE = True # from now on, share variables ## LOSS FUNCTION ## def binary_crossentropy(t, o): return -(t * tf.log(o + eps) + (1.0 - t) * tf.log(1.0 - o + eps))
def gated_conv2d(X, K_h, K_w, K_c, strides=[1, 1, 1, 1], padding='SAME', mask=None, cond_h=None, vertical_h=None): """Summary Parameters ---------- X : TYPE Description K_h : TYPE Description K_w : TYPE Description K_c : TYPE Description strides : list, optional Description padding : str, optional Description mask : None, optional Description cond_h : None, optional Description vertical_h : None, optional Description Returns ------- TYPE Description """ with tf.variable_scope('masked_cnn'): W = tf.get_variable( name='W', shape=[K_h, K_w, X.shape[-1].value, K_c * 2], initializer=tf.contrib.layers.xavier_initializer_conv2d()) b = tf.get_variable(name='b', shape=[K_c * 2], initializer=tf.zeros_initializer()) if mask is not None: W = tf.multiply(mask, W) # Initial convolution with masked kernel h = tf.nn.bias_add( tf.nn.conv2d(X, W, strides=strides, padding=padding), b) # Combine the horizontal stack's pre-activations to our hidden embedding before # applying the split nonlinearities. Check Figure 2 for details. if vertical_h is not None: with tf.variable_scope('vtoh'): W_vtoh = tf.get_variable( name='W', shape=[1, 1, K_c * 2, K_c * 2], initializer=tf.contrib.layers.xavier_initializer_conv2d()) b_vtob = tf.get_variable(name='b', shape=[K_c * 2], initializer=tf.zeros_initializer()) h = tf.add( h, tf.nn.bias_add( tf.nn.conv2d(vertical_h, W_vtoh, strides=strides, padding=padding), b_vtob)) # Condition on some given data if cond_h is not None: with tf.variable_scope('conditioning'): V = tf.get_variable( name='V', shape=[cond_h.shape[1].value, K_c], initializer=tf.contrib.layers.xavier_initializer_conv2d()) b = tf.get_variable(name='b', shape=[K_c], initializer=tf.zeros_initializer()) h = tf.add(h, tf.reshape(tf.nn.bias_add(tf.matmul(cond_h, V), b), tf.shape(X)[0:3] + [K_c]), name='h') with tf.variable_scope('gated_cnn'): # Finally slice and apply gated multiplier h_f = tf.slice(h, [0, 0, 0, 0], [-1, -1, -1, K_c]) h_g = tf.slice(h, [0, 0, 0, K_c], [-1, -1, -1, K_c]) y = tf.multiply(tf.nn.tanh(h_f), tf.sigmoid(h_g)) return y, h
# TODO : 1. Create weights and biases of encoder's second layer. (5%) # Hint : use n_hidden_2 encoder_w2 = tf.Variable(tf.truncated_normal([n_hidden_1, n_hidden_2])) encoder_b2 = tf.Variable(tf.zeros([n_hidden_2])) # TODO : 2. Create weights and biases of encoder's *second* layer. (5%) # Hint : pay attention to the symmetry between layers decoder_w2 = tf.Variable(tf.truncated_normal([n_hidden_2, n_hidden_1])) decoder_b2 = tf.Variable(tf.zeros([n_hidden_1])) # Weights and biases of decoder's *first* layer. decoder_w1 = tf.Variable(tf.truncated_normal([n_hidden_1, NUM_FEATURES])) decoder_b1 = tf.Variable(tf.zeros([NUM_FEATURES])) # Training computation. encoder_l1 = tf.sigmoid( tf.matmul(tf_train_features, encoder_w1) + encoder_b1) # TODO : 3. Write the computation of encoder's second layer and decoder's *second* layer. (5%) # Hint : similar to encoder_l1 and decoder_l1 encoder_l2 = tf.sigmoid(tf.matmul(encoder_l1, encoder_w2) + encoder_b2) decoder_l2 = tf.sigmoid(tf.matmul(encoder_l2, decoder_w2) + decoder_b2) decoder_l1 = tf.sigmoid(tf.matmul(decoder_l2, decoder_w1) + decoder_b1) # TODO : 4. Define the loss function. (5%) # Hint : use tf.losses.mean_squared_error() loss = tf.losses.mean_squared_error(tf_train_features, decoder_l1) # TODO : 5. Define a gradient descent optimizer. (5%) # Hint : user tf.train.GradientDescentOptimizer(...).minimize(...) optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(loss) # Training process.
def pixel_flow(x, offset, interpolation='bilinear', name='pixel_flow'): """pixel_flow: an operation to reorder pixels according to offsets. Args: x (tf.Tensor): NHWC offset (tf.Tensor): NHW2, 2 indicates (h, w) coordinates offset interpolation: bilinear, softmax name: name of module References ---------- [1] Spatial Transformer Networks: https://arxiv.org/abs/1506.02025 [2] https://github.com/ppwwyyxx/tensorpack """ def reindex(x, offset): offset = tf.cast(offset, tf.int32) xs = tf.shape(input=x) ofs = tf.shape(input=offset) n_add = tf.tile(tf.reshape(tf.range(xs[0]), [xs[0], 1, 1, 1]), [1, xs[1], xs[2], 1]) h_add = tf.tile(tf.reshape(tf.range(xs[1]), [1, xs[1], 1, 1]), [xs[0], 1, xs[2], 1]) w_add = tf.tile(tf.reshape(tf.range(xs[2]), [1, 1, xs[2], 1]), [xs[0], xs[1], 1, 1]) coords = offset + tf.concat([h_add, w_add], axis=3) coords = tf.clip_by_value(coords, 0, [xs[1] - 1, xs[2] - 1]) coords = tf.concat([n_add, coords], axis=3) # TODO(Jiahui): gather nd is also too slow. sampled = tf.gather_nd(x, coords) return sampled def reindex_slow(x, offset): offset = tf.cast(offset, tf.int32) xs = tf.shape(input=x) ofs = tf.shape(input=offset) n_add = tf.tile(tf.reshape(tf.range(xs[0]), [xs[0], 1, 1, 1]), [1, xs[1], xs[2], 1]) h_add = tf.tile(tf.reshape(tf.range(xs[1]), [1, xs[1], 1, 1]), [xs[0], 1, xs[2], 1]) w_add = tf.tile(tf.reshape(tf.range(xs[2]), [1, 1, xs[2], 1]), [xs[0], xs[1], 1, 1]) coords = offset + tf.concat([h_add, w_add], axis=3) coords = tf.clip_by_value(coords, 0, [xs[1] - 1, xs[2] - 1]) coords = tf.concat([n_add, coords], axis=3) x = tf.reshape(x, [-1, xs[3]]) coords_flat = tf.reshape(coords, [-1, 3]) # (batch, height, width) coords_flat = (coords_flat[:, 0] * xs[0] * xs[1] + coords_flat[:, 1] * xs[1] + coords_flat[:, 2]) sampled = tf.gather(x, coords_flat) sampled = tf.reshape(sampled, xs) return sampled with tf.compat.v1.variable_scope(name): assert x.get_shape().ndims == 4 and offset.get_shape().ndims == 4 l = tf.floor(offset) # lower u = l + 1 # upper diff = offset - l neg_diff = 1.0 - diff lh, lw = tf.split(l, 2, axis=3) uh, uw = tf.split(u, 2, axis=3) lhuw = tf.concat([lh, uw], axis=3) uhlw = tf.concat([uh, lw], axis=3) diffh, diffw = tf.split(diff, 2, axis=3) neg_diffh, neg_diffw = tf.split(neg_diff, 2, axis=3) if interpolation == 'bilinear': pass elif interpolation == 'softmax': scale = 10. diffh = tf.sigmoid(scale * (diffh - 0.5)) diffw = tf.sigmoid(scale * (diffw - 0.5)) neg_diffh = tf.sigmoid(scale * (neg_diffh - 0.5)) neg_diffw = tf.sigmoid(scale * (neg_diffw - 0.5)) else: assert NotImplementedError( "interpolation method: {} is not implemented.".format( interpolation)) sampled = tf.add_n([ reindex(x, l) * neg_diffw * neg_diffh, reindex(x, u) * diffw * diffh, reindex(x, lhuw) * neg_diffh * diffw, reindex(x, uhlw) * diffh * neg_diffw ], name='sampled') return sampled
def model_fn(features, labels, mode, params): """Build Model function f(x) for Estimator.""" #------hyper parameters------ field_size = params['field_size'] feature_size = params['feature_size'] embedding_size = params['embedding_size'] l2_reg = params['l2_reg'] learning_rate = params['learning_rate'] dropout = params['dropout'] layers = params['layers'] #------build weights------ Global_Bias = tf.get_variable(name='bias', shape=[1], initializer=tf.constant_initializer(0.0)) Feat_Wgts = tf.get_variable(name='linear', shape=[feature_size], initializer=tf.glorot_normal_initializer()) Feat_Emb = tf.get_variable(name='emb', shape=[feature_size, embedding_size], initializer=tf.glorot_normal_initializer()) #------build feature------ feat_ids = features['feat_ids'] feat_ids = tf.reshape(feat_ids, shape=[-1, field_size]) feat_vals = features['feat_vals'] feat_vals = tf.reshape(feat_vals, shape=[-1, field_size]) #------build f(x)------ # f(x) = bias + sum(wx) + MLP(BI(embed_vec)) # FM部分 with tf.variable_scope("Linear-part"): feat_wgts = tf.nn.embedding_lookup(Feat_Wgts, feat_ids) # None * F * 1 y_linear = tf.reduce_sum(tf.multiply(feat_wgts, feat_vals), 1) # None * 1 with tf.variable_scope("BiInter-part"): embeddings = tf.nn.embedding_lookup(Feat_Emb, feat_ids) # None * F * k feat_vals = tf.reshape(feat_vals, shape=[-1, field_size, 1]) # None * F * 1 embeddings = tf.multiply(embeddings, feat_vals) # vi * xi sum_square_emb = tf.square(tf.reduce_sum(embeddings, 1)) square_sum_emb = tf.reduce_sum(tf.square(embeddings), 1) deep_inputs = 0.5 * tf.subtract(sum_square_emb, square_sum_emb) # None * k with tf.variable_scope("Deep-part"): if mode == tf.estimator.ModeKeys.TRAIN: train_phase = True else: train_phase = False # BI的输出需要进行Batch Normalization deep_inputs = batch_norm_layer(deep_inputs, train_phase=train_phase, scope_bn="bn_after_bi") # BI的输出进行Dropout if mode == tf.estimator.ModeKeys.TRAIN: deep_inputs = tf.nn.dropout( deep_inputs, keep_prob=dropout[-1]) # dropout at bilinear interaction layer for i in range(len(layers)): deep_inputs = tf.contrib.layers.fully_connected( inputs=deep_inputs, num_outputs=layers[i], weights_regularizer=tf.contrib.layers.l2_regularizer(l2_reg), scope="mlp%d" % i) # 注意是先进行Batch Norm,再进行Dropout # Batch Normalization deep_inputs = batch_norm_layer(deep_inputs, train_phase=train_phase, scope_bn="bn%d" % i) # Dropout if mode == tf.estimator.ModeKeys.TRAIN: deep_inputs = tf.nn.dropout(deep_inputs, keep_prob=dropout[i]) # Output y_deep = tf.contrib.layers.fully_connected( inputs=deep_inputs, num_outputs=1, activation_fn=tf.identity, weights_regularizer=tf.contrib.layers.l2_regularizer(l2_reg), scope="deep_out") y_d = tf.reshape(y_deep, shape=[-1]) with tf.variable_scope("NFM-out"): y_bias = Global_Bias * tf.ones_like(y_d, dtype=tf.float32) y = y_bias + y_linear + y_d pred = tf.sigmoid(y) predictions = {"prob": pred} export_outputs = { tf.saved_model.signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY: tf.estimator.export.PredictOutput(predictions) } # Provide an estimator spec for `ModeKeys.PREDICT` if mode == tf.estimator.ModeKeys.PREDICT: return tf.estimator.EstimatorSpec(mode=mode, predictions=predictions, export_outputs=export_outputs) #------build loss------ loss = tf.reduce_mean( tf.nn.sigmoid_cross_entropy_with_logits(logits=y, labels=labels) ) + l2_reg * tf.nn.l2_loss(Feat_Wgts) + l2_reg * tf.nn.l2_loss(Feat_Emb) # Provide an estimator spec for `ModeKeys.EVAL` eval_metric_ops = {"auc": tf.metrics.auc(labels, pred)} if mode == tf.estimator.ModeKeys.EVAL: return tf.estimator.EstimatorSpec(mode=mode, predictions=predictions, loss=loss, eval_metric_ops=eval_metric_ops) #------build optimizer------ optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate, beta1=0.9, beta2=0.999, epsilon=1e-8) train_op = optimizer.minimize(loss, global_step=tf.train.get_global_step()) # Provide an estimator spec for `ModeKeys.TRAIN` modes if mode == tf.estimator.ModeKeys.TRAIN: return tf.estimator.EstimatorSpec(mode=mode, predictions=predictions, loss=loss, train_op=train_op)
optimizer = tf.train.AdamOptimizer(0.01, epsilon=1.0) inference.initialize(optimizer=optimizer) init = tf.initialize_all_variables() init.run() n_epoch = 100 n_iter_per_epoch = 1000 for epoch in range(n_epoch): avg_loss = 0.0 widgets = ["epoch #%d|" % epoch, Percentage(), Bar(), ETA()] pbar = ProgressBar(n_iter_per_epoch, widgets=widgets) pbar.start() for t in range(n_iter_per_epoch): pbar.update(t) x_train, _ = mnist.train.next_batch(M) info_dict = inference.update(feed_dict={x_ph: x_train}) avg_loss += info_dict['loss'] # Print a lower bound to the average marginal likelihood for an # image. avg_loss = avg_loss / n_iter_per_epoch avg_loss = avg_loss / M print("log p(x) >= {:0.3f}".format(avg_loss)) # Visualize hidden representations. imgs = tf.sigmoid(logits).eval() for m in range(M): imsave(os.path.join(IMG_DIR, '%d.png') % m, imgs[m].reshape(28, 28))
]) loss_func = tf.keras.losses.BinaryCrossentropy(from_logits=True) optimizer = tf.keras.optimizers.Adam() for e in range(10): accuracy = [] for batch, (text, label) in enumerate(train_data.take(-1)): with tf.GradientTape() as tape: logits = model(text) label = tf.expand_dims(label, 1) loss = loss_func(label, logits) gradients = tape.gradient(loss, model.trainable_variables) grads_and_vars = zip(gradients, model.trainable_variables) optimizer.apply_gradients(grads_and_vars) predictions = tf.cast(tf.math.greater(tf.sigmoid(logits), 0.5), tf.int64) accuracy.extend( tf.cast(tf.equal(predictions, label), tf.int64).numpy()) if batch % 100 == 0: print('\nEpoch: {} - Batch: {}'.format(e, batch)) print('Loss: {:.4f}'.format(loss.numpy())) print('Accuracy: {}'.format(np.mean(accuracy))) for _, (text, label) in enumerate(test_data.take(1)): logits = model(text) random_id = np.random.choice(label.shape[0], 5) for ix in random_id: print('\n')