Example #1
0
        def encoder_body(time, old_state, output_ta_t):
            x_t = input_ta.read(time)

            con = tf.concat(1, [x_t, old_state])
            z = tf.sigmoid(tf.matmul(con, W_z) + b_z)
            r = tf.sigmoid(tf.matmul(con, W_r) + b_r)
            con = tf.concat(1, [x_t, r*old_state])
            h = tf.tanh(tf.matmul(con, W_h) + b_h)
            new_state = (1-z)*h + z*old_state

            output_ta_t = output_ta_t.write(time, new_state)

            def updateall():
                return new_state

            def updatesome():
                if reverse:
                    return tf.select(
                        tf.greater_equal(time, max_sequence_length-lengths),
                        new_state,
                        old_state)
                else:
                    return tf.select(tf.less(time, lengths), new_state, old_state)

            if reverse:
                state = tf.cond(
                    tf.greater_equal(time, max_sequence_length-min_sequence_length),
                    updateall,
                    updatesome)
            else:
                state = tf.cond(tf.less(time, min_sequence_length), updateall, updatesome)

            return (time + 1, state, output_ta_t)
 def lstm_cell(i, o, state):
     input_gate = tf.sigmoid(tf.matmul(i, ix) + tf.matmul(o, im) + ib)
     forget_gate = tf.sigmoid(tf.matmul(i, fx) + tf.matmul(o, fm) + fb)
     update = tf.matmul(i, cx) + tf.matmul(o, cm) + cb
     state = forget_gate * state + input_gate * tf.tanh(update)
     output_gate = tf.sigmoid(tf.matmul(i, ox) + tf.matmul(o, om) + ob)
     return output_gate * tf.tanh(state), state
Example #3
0
 def LSTMCell(cls, x, mprev, cprev, weights):
   xm = tf.concat(1, [x, mprev])
   i_i, i_g, f_g, o_g = tf.split(1, 4, tf.matmul(xm, weights))
   new_c = tf.sigmoid(f_g) * cprev + tf.sigmoid(i_g) * tf.tanh(i_i)
   new_c = tf.clip_by_value(new_c, -50.0, 50.0)
   new_m = tf.sigmoid(o_g) * tf.tanh(new_c)
   return new_m, new_c
Example #4
0
    def __call__(self, inputs, state, scope=None):
        num_proj = self._num_units if self._num_proj is None else self._num_proj

        c_prev = tf.slice(state, [0, 0], [-1, self._num_units])
        m_prev = tf.slice(state, [0, self._num_units], [-1, num_proj])

        input_size = inputs.get_shape().with_rank(2)[1]
        if input_size.value is None:
            raise ValueError("Could not infer input size from inputs.get_shape()[-1]")

        with tf.variable_scope(type(self).__name__,
                               initializer=self._initializer):
            # i = input_gate, j = new_input, f = forget_gate, o = output_gate
            cell_inputs = tf.concat(1, [inputs, m_prev])
            lstm_matrix = tf.nn.bias_add(tf.matmul(cell_inputs, self._concat_w),
                                         self._b)
            i, j, f, o = tf.split(1, 4, lstm_matrix)

            c = tf.sigmoid(f + self._forget_bias) * c_prev + tf.sigmoid(i) * tf.sigmoid(j)

            m = tf.sigmoid(o) * tf.tanh(c)

            if self._num_proj is not None:
                m = tf.matmul(m, self._concat_w_proj)

        new_state = tf.concat(1, [c, m])
        return m, new_state
Example #5
0
        def unit(x, hidden_memory_tm1):
            previous_hidden_state, c_prev = tf.unpack(hidden_memory_tm1)

            # Input Gate
            i = tf.sigmoid(
                tf.matmul(x, self.Wi) +
                tf.matmul(previous_hidden_state, self.Ui) + self.bi
            )

            # Forget Gate
            f = tf.sigmoid(
                tf.matmul(x, self.Wf) +
                tf.matmul(previous_hidden_state, self.Uf) + self.bf
            )

            # Output Gate
            o = tf.sigmoid(
                tf.matmul(x, self.Wog) +
                tf.matmul(previous_hidden_state, self.Uog) + self.bog
            )

            # New Memory Cell
            c_ = tf.nn.tanh(
                tf.matmul(x, self.Wc) +
                tf.matmul(previous_hidden_state, self.Uc) + self.bc
            )

            # Final Memory cell
            c = f * c_prev + i * c_

            # Current Hidden state
            current_hidden_state = o * tf.nn.tanh(c)

            return tf.pack([current_hidden_state, c])
Example #6
0
def train():

    #placeholders for the traning inputs (4 inputs with 2 features each) and outputs (4 outputs which have a value of 0 or 1)
    x = tf.placeholder(tf.float32, [4, 2], name='x-inputs')
    y = tf.placeholder(tf.float32, [4, 1], name='y-inputs')

    #set up the model calculations
    temp = tf.sigmoid(tf.matmul(x, w1) + b1)
    output = tf.sigmoid(tf.matmul(temp, w2) + b2)

    #cost function is avg error over training samples
    cost = tf.reduce_mean(((y * tf.log(output)) + ((1 - y) * tf.log(1.0 - output))) * -1)

    #training step is gradient descent
    train_step = tf.train.GradientDescentOptimizer(learning_rate=0.01).minimize(cost)

    #declare training data
    training_x = [[0,1], [0,0], [1,0], [1,1]]
    training_y = [[1], [0], [1], [0]]

    #init session
    init = tf.initialize_all_variables()
    sess.run(init)

    #training
    for i in range(100000):
        sess.run(train_step, feed_dict={x:training_x, y:training_y})

        if i % 1000 == 0:
            print (i, sess.run(cost, feed_dict={x:training_x, y:training_y}))

    print '\ntraining done\n'
Example #7
0
 def loss_fn(w_flat):
   w = tf.reshape(w_flat, [visible_size, hidden_size])
   x = tf.matmul(data, w)
   x = tf.sigmoid(x)
   x = tf.matmul(x, w, transpose_b=True)
   x = tf.sigmoid(x)
   return tf.reduce_mean(tf.square(x-data))
Example #8
0
    def __call__(self, inputs, state, scope=None):
        with tf.variable_scope(scope or type(self).__name__):
            initializer = tf.random_uniform_initializer(-0.1, 0.1)

            def get_variable(name, shape):
                return tf.get_variable(name, shape, initializer=initializer, dtype=inputs.dtype)

            c_prev, y_prev = tf.split(1, 2, state)

            W_z = get_variable("W_z", [self.input_size, self._num_blocks])
            W_f = get_variable("W_f", [self.input_size, self._num_blocks])
            W_o = get_variable("W_o", [self.input_size, self._num_blocks])

            R_z = get_variable("R_z", [self._num_blocks, self._num_blocks])
            R_f = get_variable("R_f", [self._num_blocks, self._num_blocks])
            R_o = get_variable("R_o", [self._num_blocks, self._num_blocks])

            b_z = get_variable("b_z", [1, self._num_blocks])
            b_f = get_variable("b_f", [1, self._num_blocks])
            b_o = get_variable("b_o", [1, self._num_blocks])

            p_f = get_variable("p_f", [self._num_blocks])
            p_o = get_variable("p_o", [self._num_blocks])

            g = h = tf.tanh

            z = g(tf.matmul(inputs, W_z) + tf.matmul(y_prev, R_z) + b_z)
            i = 1
            f = tf.sigmoid(tf.matmul(inputs, W_f) + tf.matmul(y_prev, R_f) + tf.mul(c_prev, p_f) + b_f)
            c = tf.mul(i, z) + tf.mul(f, c_prev)
            o = tf.sigmoid(tf.matmul(inputs, W_o) + tf.matmul(y_prev, R_o) + tf.mul(c, p_o) + b_o)
            y = tf.mul(h(c), o)

            return y, tf.concat(1, [c, y])
Example #9
0
File: main.py Project: Daiver/jff
def mkDiscriminator(input, weights):
    l1 = tf.nn.tanh(tf.matmul(input, weights['w1']) + weights['b1'])
    l2 = tf.nn.tanh(tf.matmul(l1,weights['w2']) + weights['b2'])
    l3 = tf.sigmoid(tf.matmul(l2,weights['w3']) + weights['b3'])
    return l3
    l4 = tf.sigmoid(tf.matmul(l3,weights['w4']) + weights['b4'])
    return l4
  def __call__(self, inputs, state, scope=None):
    with tf.device("/gpu:"+str(self._gpu_for_layer)):
      """JZS1, mutant 1 with n units cells."""
      with tf.variable_scope(scope or type(self).__name__):  # "JZS1Cell"
        with tf.variable_scope("Zinput"):  # Reset gate and update gate.
          # We start with bias of 1.0 to not reset and not update.
          '''equation 1 z = sigm(WxzXt+Bz), x_t is inputs'''

          z = tf.sigmoid(linear([inputs], 
                            self._num_units, True, 1.0, weight_initializer = self._weight_initializer, orthogonal_scale_factor = self._orthogonal_scale_factor)) 

        with tf.variable_scope("Rinput"):
          '''equation 2 r = sigm(WxrXt+Whrht+Br), h_t is the previous state'''

          r = tf.sigmoid(linear([inputs,state],
                            self._num_units, True, 1.0, weight_initializer = self._weight_initializer, orthogonal_scale_factor = self._orthogonal_scale_factor))
          '''equation 3'''
        with tf.variable_scope("Candidate"):
          component_0 = linear([r*state], 
                            self._num_units, True) 
          component_1 = tf.tanh(tf.tanh(inputs) + component_0)
          component_2 = component_1*z
          component_3 = state*(1 - z)

        h_t = component_2 + component_3

      return h_t, h_t #there is only one hidden state output to keep track of. 
  def __call__(self, inputs, state, scope=None):
    with tf.device("/gpu:"+str(self._gpu_for_layer)):
      """JZS3, mutant 2 with n units cells."""
      with tf.variable_scope(scope or type(self).__name__):  # "JZS1Cell"
        with tf.variable_scope("Zinput"):  # Reset gate and update gate.
          # We start with bias of 1.0 to not reset and not update.
          '''equation 1'''

          z = tf.sigmoid(linear([inputs, tf.tanh(state)], 
                            self._num_units, True, 1.0, weight_initializer = self._weight_initializer, orthogonal_scale_factor = self._orthogonal_scale_factor))

          '''equation 2'''
        with tf.variable_scope("Rinput"):
          r = tf.sigmoid(linear([inputs, state],
                            self._num_units, True, 1.0, weight_initializer = self._weight_initializer, orthogonal_scale_factor = self._orthogonal_scale_factor))
          '''equation 3'''
        with tf.variable_scope("Candidate"):
          component_0 = linear([state*r,inputs],
                            self._num_units, True)
          
          component_2 = (tf.tanh(component_0))*z
          component_3 = state*(1 - z)

        h_t = component_2 + component_3

      return h_t, h_t #there is only one hidden state output to keep track of. 
Example #12
0
    def unroll(inp, state):
        g_i = tf.sigmoid(tf.matmul(inp, w_xi) + tf.matmul(state, w_hi) + b_i)
        g_r = tf.sigmoid(tf.matmul(inp, w_xr) + tf.matmul(state, w_hr) + b_r)
        u = tf.tanh(tf.matmul(inp, w_xu) + g_r * tf.matmul(state, w_hu) + b_u)
        state = state * (1 - g_i) + u * g_i

        return state
	def forward_propogation(self):
		x = tf.placeholder("float")
		z2 = tf.add(tf.matmul(x,self.W1),self.b1)
		a2 = tf.sigmoid(z2, name="Hidden Activation")
		z3 = tf.add(tf.matmul(a2,self.W2),self.b2)
		a3 = tf.sigmoid(z3, name="Output Activation")
		return a3
Example #14
0
    def add_model(self, inputs1, inputs2, seq_len1, seq_len2):
        #self.initial_state = tf.constant(np.zeros(()), dtype=tf.float32)
        print 'adsf add_model'
        self.initial_state = tf.constant(np.zeros((self.config.batch_size,self.config.hidden_size)), dtype=tf.float32)
        rnn_outputs  = []
        rnn_outputs1 = []
        rnn_outputs2 = []
        h_curr1 = self.initial_state
        h_curr2 = self.initial_state
        print 'nthgnghn'
        with tf.variable_scope('rnn'):
            Whh = tf.get_variable('Whh', shape=(self.config.hidden_size,self.config.hidden_size), dtype=tf.float32)
            Wxh = tf.get_variable('Wxh', shape=(self.config.embed_size,self.config.hidden_size),  dtype=tf.float32)
            b1  = tf.get_variable('bhx', shape=(self.config.hidden_size,),                        dtype=tf.float32)
            print Wxh.get_shape
            print inputs1[0].get_shape
            print inputs2[0].get_shape
            for i in range(self.config.max_steps):
                h_curr2 = tf.matmul(h_curr2,Whh) 
                h_curr2 += tf.matmul(inputs2[i],Wxh)
                h_curr2 += b1
                h_curr2 = tf.sigmoid(h_curr2)

                h_curr1 = tf.sigmoid(tf.matmul(h_curr1,Whh) + tf.matmul(inputs1[i],Wxh) + b1)
                rnn_outputs1.append(h_curr1)
                rnn_outputs2.append(h_curr2)
        
        rnn_states = [tf.concat(1, [rnn_outputs1[i], rnn_outputs2[i]]) for i in range(self.config.max_steps)]
        return rnn_states
    def __call__(self, inputs, state, scope = None):
        with tf.variable_scope(scope or type(self).__name__):
            with tf.variable_scope("Gates"):
                reset, update = tf.split(
                    1,
                    2,
                    linear(
                        [inputs, states], 
                        2 * self._num_units,
                        bias = True,
                        bias_start = 1.0
                    )
                )
                reset, update = tf.sigmoid(reset), tf.sigmoid(update)

            with tf.variable_scope("Candidate"):
                candidate = linear(
                    [inputs, reset * state],
                    self._num_units,
                    bias = True
                )
                candidate = tf.tanh(candidate)

            new_state = update * state + (1 - update) * candidate

            return new_state, new_state
Example #16
0
  def __call__(self, x, state, scope=None):
    with tf.variable_scope(scope or type(self).__name__):
      c, h = tf.split(state, 2, 1)

      x_size = x.get_shape().as_list()[1]

      w_init = None  # uniform

      h_init = lstm_ortho_initializer(1.0)

      # Keep W_xh and W_hh separate here as well to use different init methods.
      w_xh = tf.get_variable(
          'W_xh', [x_size, 4 * self.num_units], initializer=w_init)
      w_hh = tf.get_variable(
          'W_hh', [self.num_units, 4 * self.num_units], initializer=h_init)
      bias = tf.get_variable(
          'bias', [4 * self.num_units],
          initializer=tf.constant_initializer(0.0))

      concat = tf.concat([x, h], 1)
      w_full = tf.concat([w_xh, w_hh], 0)
      hidden = tf.matmul(concat, w_full) + bias

      i, j, f, o = tf.split(hidden, 4, 1)

      if self.use_recurrent_dropout:
        g = tf.nn.dropout(tf.tanh(j), self.dropout_keep_prob)
      else:
        g = tf.tanh(j)

      new_c = c * tf.sigmoid(f + self.forget_bias) + tf.sigmoid(i) * g
      new_h = tf.tanh(new_c) * tf.sigmoid(o)

      return new_h, tf.concat([new_c, new_h], 1)  # fuk tuples.
  def __call__(self, inputs, state, timestep = 0, scope=None):
    """Most basic RNN: output = new_state = tanh(W * input + U * state + B)."""

    current_state = state
    for highway_layer in xrange(self.num_highway_layers):
      with tf.variable_scope('highway_factor_'+str(highway_layer)):
        if self.use_inputs_on_each_layer or highway_layer == 0:
          highway_factor = tf.tanh(multiplicative_integration([inputs, current_state], self._num_units))
        else:
          highway_factor = tf.tanh(linear([current_state], self._num_units, True))

      with tf.variable_scope('gate_for_highway_factor_'+str(highway_layer)):
        if self.use_inputs_on_each_layer or highway_layer == 0:
          gate_for_highway_factor = tf.sigmoid(multiplicative_integration([inputs, current_state], self._num_units, initial_bias_value = -3.0))
        else:
          gate_for_highway_factor = tf.sigmoid(linear([current_state], self._num_units, True, -3.0))

        gate_for_hidden_factor = 1 - gate_for_highway_factor

        if self.use_recurrent_dropout and self.is_training:
          highway_factor = tf.nn.dropout(highway_factor, self.recurrent_dropout_factor)

      current_state = highway_factor * gate_for_highway_factor + current_state * gate_for_hidden_factor

    return current_state, current_state
def make_tf_top(x_shape, loss='sigmoid_ce'):
  """
    builds the top layer, i.e. the loss layer. 
  """
  with tf.name_scope('top') as scope:
    x = tf.placeholder(tf.float32, shape=x_shape, name='input')
    y = tf.placeholder(tf.float32, shape=x_shape, name='output')

    if loss=='sigmoid_ce':
      L = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(x, y))
      correct_prediction = tf.equal(tf.round( tf.sigmoid(x) ), tf.round( y ))
      accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
      accuracy_summary = [tf.summary.scalar('accuracy', accuracy)]
    elif loss=='softmax_ce':
      L = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(x, y))
      correct_prediction = tf.equal(tf.argmax( tf.nn.softmax(x), 1 ), tf.argmax( y, 1 ))
      accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
      accuracy_summary = [tf.summary.scalar('accuracy', accuracy)]
    elif loss=='sigmoid_l2':
      L = tf.nn.l2_loss(tf.sigmoid(x) - y)
      accuracy = None
      accuracy_summary = []
    elif loss=='l2':
      L = tf.nn.l2_loss(x - y)
      accuracy = None
      accuracy_summary = []

    loss_summary = tf.summary.scalar('log_loss', tf.log(L))
    dx = tf.gradients(L, x)[0]

    return L, dx, tf.summary.merge([loss_summary] + accuracy_summary), accuracy
Example #19
0
  def __call__(self, x, state, timestep=0, scope=None):
    with tf.variable_scope(scope or type(self).__name__):
      h, c = tf.split(state, 2, 1)

      h_size = self.num_units
      x_size = x.get_shape().as_list()[1]
      batch_size = x.get_shape().as_list()[0]

      w_init = None  # uniform

      h_init = lstm_ortho_initializer(1.0)

      w_xh = tf.get_variable(
          'W_xh', [x_size, 4 * self.num_units], initializer=w_init)
      w_hh = tf.get_variable(
          'W_hh', [self.num_units, 4 * self.num_units], initializer=h_init)

      concat = tf.concat([x, h], 1)  # concat for speed.
      w_full = tf.concat([w_xh, w_hh], 0)
      concat = tf.matmul(concat, w_full)  #+ bias # live life without garbage.

      # i = input_gate, j = new_input, f = forget_gate, o = output_gate
      concat = layer_norm_all(concat, batch_size, 4, h_size, 'ln_all')
      i, j, f, o = tf.split(concat, 4, 1)

      if self.use_recurrent_dropout:
        g = tf.nn.dropout(tf.tanh(j), self.dropout_keep_prob)
      else:
        g = tf.tanh(j)

      new_c = c * tf.sigmoid(f + self.forget_bias) + tf.sigmoid(i) * g
      new_h = tf.tanh(layer_norm(new_c, h_size, 'ln_c')) * tf.sigmoid(o)

    return new_h, tf.concat([new_h, new_c], 1)
Example #20
0
  def __call__(self, x_placeholder, h_prev, C_prev):
    with tf.variable_scope(self.scope, reuse=True):
      embedding = tf.get_variable('embedding')
      W = tf.get_variable('weight')

    x_embedding = tf.nn.embedding_lookup(embedding, x_placeholder)

    if self.is_training:
      x_embedding = tf.nn.dropout(x_embedding, self.keep_prob)

    # forget gate
    concat_input = tf.concat(1, [h_prev, x_embedding])
    gates = tf.matmul(concat_input, W)
    m_f, m_i, m_C_update, m_o = tf.split(1, 4, gates)

    # forget gate
    f = tf.sigmoid(m_f)
    # input gate
    i = tf.sigmoid(m_i)
    # output gate
    o = tf.sigmoid(m_o)
    # Cell update
    C_update = tf.tanh(m_C_update)

    # cell after update
    # Add a dropout layer.
    C = tf.mul(f, C_prev) + tf.mul(i, C_update)

    # output
    h = tf.mul(o, tf.tanh(C))
    return h, C
Example #21
0
def lstm_cell(x, h, c, name=None, reuse=False):
  """LSTM returning hidden state and content cell at a specific timestep."""
  nin = x.shape[-1].value
  nout = h.shape[-1].value
  with tf.variable_scope(name, default_name="lstm",
                         values=[x, h, c], reuse=reuse):
    wx = tf.get_variable("kernel/input", [nin, nout * 4],
                         dtype=tf.float32,
                         initializer=tf.orthogonal_initializer(1.0))
    wh = tf.get_variable("kernel/hidden", [nout, nout * 4],
                         dtype=tf.float32,
                         initializer=tf.orthogonal_initializer(1.0))
    b = tf.get_variable("bias", [nout * 4],
                        dtype=tf.float32,
                        initializer=tf.constant_initializer(0.0))

  z = tf.matmul(x, wx) + tf.matmul(h, wh) + b
  i, f, o, u = tf.split(z, 4, axis=1)
  i = tf.sigmoid(i)
  f = tf.sigmoid(f + 1.0)
  o = tf.sigmoid(o)
  u = tf.tanh(u)
  c = f * c + i * u
  h = o * tf.tanh(c)
  return h, c
Example #22
0
    def build_node(self, x_in, c_in, h_in, scope="lstm_cell"):
        #print (x_in, c_in, h_in, scope)
        #print [type(thing) for thing in (x_in, c_in, h_in, scope)]
        # print [(item.name, item.dtype) for thing in (h_in, c_in) for item in thing]
        # print (x_in.name, x_in.dtype)

        with tf.variable_scope(scope):
            # print x.shape
            # print h_in.get_shape()
            x_with_h = tf.concat(2, [x_in, h_in])

            ones_for_bias = tf.constant(np.ones([batch_size,1,1]), name="b", dtype=tf.float32)
            x_h_concat = tf.concat(2, [ones_for_bias, x_with_h])

            # forget gate layer
            # print "w_f: ", self.w_f.get_shape()
            # print "x_h_concat: ", x_h_concat.get_shape()
            f = tf.sigmoid(tf.batch_matmul(x_h_concat, self.w_f))

            # candidate values
            i = tf.sigmoid(tf.batch_matmul(x_h_concat, self.w_i))
            candidate_c = tf.tanh(tf.batch_matmul(x_h_concat, self.w_c))

            # new cell state (hidden)
            # forget old values of c
            old_c_to_keep = tf.mul(f, c_in)
            # scaled candidate values of c
            new_c_to_keep = tf.mul(i, candidate_c)
            c = tf.add(old_c_to_keep, new_c_to_keep)

            # new scaled output
            o = tf.sigmoid(tf.batch_matmul(x_h_concat, self.w_o))
            h = tf.mul(o, tf.tanh(c))
            return (c, h)
    def __call__(self, inputs, state, scope=None):
        """Long short-term memory cell (LSTM)."""
        with tf.variable_scope(self, scope or "basic_lstm_cell", reuse=self._reuse):
            # Parameters of gates are concatenated into one multiply for
            # efficiency.
            if self._state_is_tuple:
                c_prev, h_prev = state
            else:
                c_prev, h_prev = tf.split(
                    value=state, num_or_size_splits=2, axis=1)
            concat = tf.contrib.rnn._linear(
                [inputs, h_prev], 4 * self._num_units, True)

            # i = input_gate, g = new_input, f = forget_gate, o = output_gate
            i, g, f, o = tf.split(value=concat, num_or_size_splits=4, axis=1)

            c = (c_prev * tf.sigmoid(f + self._forget_bias) +
                 tf.sigmoid(i) * tf.tanh(g))
            h = tf.tanh(c) * tf.sigmoid(o)

            if self._state_is_tuple:
                new_state = LSTMStateTuple(c, h)
            else:
                new_state = tf.concat([c, h], 1)
            return h, new_state
Example #24
0
    def build_losses(self, logits_real, logits_fake):
        """D and G play two-player minimax game with value function V(G,D)

          min_G max _D V(D, G) = IE_{x ~ p_data} [log D(x)] + IE_{z ~ p_fake} [log (1 - D(G(z)))]

        Args:
            logits_real (tf.Tensor): discrim logits from real samples
            logits_fake (tf.Tensor): discrim logits from fake samples produced by generator
        """
        with tf.name_scope("GAN_loss"):
            score_real = tf.sigmoid(logits_real)
            score_fake = tf.sigmoid(logits_fake)
            tf.summary.histogram('score-real', score_real)
            tf.summary.histogram('score-fake', score_fake)

            with tf.name_scope("discrim"):
                d_loss_pos = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(
                    logits=logits_real, labels=tf.ones_like(logits_real)), name='loss_real')
                d_loss_neg = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(
                    logits=logits_fake, labels=tf.zeros_like(logits_fake)), name='loss_fake')

                d_pos_acc = tf.reduce_mean(tf.cast(score_real > 0.5, tf.float32), name='accuracy_real')
                d_neg_acc = tf.reduce_mean(tf.cast(score_fake < 0.5, tf.float32), name='accuracy_fake')

                d_accuracy = tf.add(.5 * d_pos_acc, .5 * d_neg_acc, name='accuracy')
                self.d_loss = tf.add(.5 * d_loss_pos, .5 * d_loss_neg, name='loss')

            with tf.name_scope("gen"):
                self.g_loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(
                    logits=logits_fake, labels=tf.ones_like(logits_fake)), name='loss')
                g_accuracy = tf.reduce_mean(tf.cast(score_fake > 0.5, tf.float32), name='accuracy')

            add_moving_summary(self.g_loss, self.d_loss, d_accuracy, g_accuracy)
  def __call__(self, inputs, state, scope=None):
    with tf.device("/gpu:"+str(self._gpu_for_layer)):
      """JZS2, mutant 2 with n units cells."""
      with tf.variable_scope(scope or type(self).__name__):  # "JZS1Cell"
        with tf.variable_scope("Zinput"):  # Reset gate and update gate.
          '''equation 1'''

          z = tf.sigmoid(linear.linear([inputs, state], 
                            self._num_units, True, 1.0))

          '''equation 2 '''
        with tf.variable_scope("Rinput"):
          r = tf.sigmoid(inputs+(linear.linear([state],
                            self._num_units, True, 1.0)))
          '''equation 3'''

        with tf.variable_scope("Candidate"):

          component_0 = linear.linear([state*r,inputs],
                            self._num_units, True)
          
          component_2 = (tf.tanh(component_0))*z
          component_3 = state*(1 - z)

        h_t = component_2 + component_3

      return h_t, h_t #there is only one hidden state output to keep track of. 
 def lstm_cell(i, o, state):
   """
   Create a LSTM cell. See e.g.: http://arxiv.org/pdf/1402.1128v1.pdf
   Note that in this formulation, we omit the various connections between the
   previous state and the gates.
   """                   
   i_list = tf.pack([i, i, i, i])
   #print i_list.get_shape().as_list()
   o_list = tf.pack([o, o, o, o])
                         
   ins = tf.batch_matmul(i_list, fico_x)
   outs = tf.batch_matmul(o_list, fico_m)
   
   h_x = ins + outs + fico_b
   #print h_x.get_shape().as_list()
   
   #forget_gate = tf.sigmoid(tf.matmul(i, fx) + tf.matmul(o, fm) + fb)
   forget_gate = tf.sigmoid(h_x[0,:,:])
   
   #input_gate = tf.sigmoid(tf.matmul(i, ix) + tf.matmul(o, im) + ib)
   input_gate = tf.sigmoid(h_x[1,:,:])
   
   #update = tf.tanh(tf.matmul(i, cx) + tf.matmul(o, cm) + cb)
   update = tf.tanh(h_x[2,:,:])
   
   state = forget_gate*state + input_gate*update
   
   #output_gate = tf.sigmoid(tf.matmul(i, ox) + tf.matmul(o, om) + ob)
   output_gate = tf.sigmoid(h_x[3,:,:])
   
   h = output_gate * tf.tanh(state)
   #print 'h', h.get_shape().as_list()
   return h, state
Example #27
0
  def __call__(self, inputs, state, scope=None):
    """Long short-term memory cell (LSTM)."""
    with tf.variable_scope(scope or type(self).__name__):  # "BasicLSTMCell"
      # Parameters of gates are concatenated into one multiply for efficiency.
      c, h = tf.split(1, 2, state)
      concat = linear.linear([inputs, h], 4 * self._num_units, True)

      fs = []

      # This can be made more efficient since we're doing more than needs to be
      # done, but for now w/e
      for child_state in child_states:
          c_k, h_k = tf.split(1, 2, child_state)
          concat = linear.linear([inputs, h_k], 4 * self._num_units, True)
          i_k, j_k, f_k, o_k = tf.split(1, 4, concat)
          fs.append(f_k)


      # i = input_gate, j = new_input, f = forget_gate, o = output_gate
      # TODO: forget gate for each child, probably need to split by number
      # of child states or something
      i, j, f, o = tf.split(1, 4, concat)

      # If no children just treat it like a regular lstm
      if not fs:
        fs.append(f)

      new_c = sum(c * tf.sigmoid(fs + self._forget_bias)) + tf.sigmoid(i) * tf.tanh(j)
      new_h = tf.tanh(new_c) * tf.sigmoid(o)

    return new_h, tf.concat(1, [new_c, new_h])
Example #28
0
  def __call__(self, inputs, state, scope=None):
    """Gated recurrent unit (GRU) with nunits cells."""
    
    with vs.variable_scope(scope or type(self).__name__):
      if self._dropMaskInput.get_shape()[1:] != inputs.get_shape()[1:]:
        print("error: "+str(self._dropMaskInput.get_shape()[1:])+" != "+str(inputs.get_shape()[1:]))
        assert(False)
      if self._dropMaskState.get_shape()[1:] != state.get_shape()[1:]:
        print("error: "+str(self._dropMaskState.get_shape()[1:])+" != "+str(state.get_shape()[1:]))
        assert(False)
      dropin = tf.mul(self._dropMaskInput, inputs)
      dropst = tf.mul(self._dropMaskState, state)

      with vs.variable_scope("Gates"):  # Reset gate and update gate.
        # We start with bias of 1.0 to not reset and not update.
        concat = rnn_cell._linear([dropin, dropst], 2 * self._num_units, True, 1.0)
        r, u = tf.split(1, 2, concat)
        r, u = tf.sigmoid(r), tf.sigmoid(u)

      with vs.variable_scope("Candidate"):
        htilda = self._activation(rnn_cell._linear([dropin, r * dropst], self._num_units, True))

      new_h = u * dropst + (1 - u) * htilda

    return new_h, new_h
Example #29
0
  def _compute_loss(self, prediction_tensor, target_tensor, weights):
    """Compute loss function.

    Args:
      prediction_tensor: A float tensor of shape [batch_size, num_anchors,
        num_classes] representing the predicted logits for each class
      target_tensor: A float tensor of shape [batch_size, num_anchors,
        num_classes] representing one-hot encoded classification targets
      weights: a float tensor of shape, either [batch_size, num_anchors,
        num_classes] or [batch_size, num_anchors, 1]. If the shape is
        [batch_size, num_anchors, 1], all the classses are equally weighted.

    Returns:
      loss: a float tensor of shape [batch_size, num_anchors, num_classes]
        representing the value of the loss function.
    """
    if self._bootstrap_type == 'soft':
      bootstrap_target_tensor = self._alpha * target_tensor + (
          1.0 - self._alpha) * tf.sigmoid(prediction_tensor)
    else:
      bootstrap_target_tensor = self._alpha * target_tensor + (
          1.0 - self._alpha) * tf.cast(
              tf.sigmoid(prediction_tensor) > 0.5, tf.float32)
    per_entry_cross_ent = (tf.nn.sigmoid_cross_entropy_with_logits(
        labels=bootstrap_target_tensor, logits=prediction_tensor))
    return per_entry_cross_ent * weights
  def call(self, x, h):
    channels = x.shape[self._feature_axis].value

    with tf.variable_scope('gates'):
      inputs = tf.concat([x, h], axis=self._feature_axis)
      n = channels + self._filters
      m = 2 * self._filters if self._filters > 1 else 2
      W = tf.get_variable('kernel', self._kernel + [n, m])
      y = tf.nn.convolution(inputs, W, 'SAME', data_format=self._data_format)
      if self._normalize:
        r, u = tf.split(y, 2, axis=self._feature_axis)
        r = tf.contrib.layers.layer_norm(r)
        u = tf.contrib.layers.layer_norm(u)
      else:
        y += tf.get_variable('bias', [m], initializer=tf.ones_initializer())
        r, u = tf.split(y, 2, axis=self._feature_axis)
      r, u = tf.sigmoid(r), tf.sigmoid(u)

      # TODO
      #tf.summary.histogram('reset_gate', r)
      #tf.summary.histogram('update_gate', u)

    with tf.variable_scope('candidate'):
      inputs = tf.concat([x, r * h], axis=self._feature_axis)
      n = channels + self._filters
      m = self._filters
      W = tf.get_variable('kernel', self._kernel + [n, m])
      y = tf.nn.convolution(inputs, W, 'SAME', data_format=self._data_format)
      if self._normalize:
        y = tf.contrib.layers.layer_norm(y)
      else:
        y += tf.get_variable('bias', [m], initializer=tf.zeros_initializer())
      h = u * h + (1 - u) * self._activation(y)

	return h, h
    def __init__(self, itemNum, userNum, emb_dim, lamda, param=None, initdelta=0.05, learning_rate=0.05):
        self.itemNum = itemNum
        self.userNum = userNum
        self.hidden_num_units = 30
        self.emb_dim = emb_dim
        self.lamda = lamda  # regularization parameters
        self.param = param
        self.initdelta = initdelta
        self.learning_rate = learning_rate
        self.d_params = []

        with tf.variable_scope('discriminator'):
            if self.param == None:
                self.user_embeddings = tf.Variable(
                    tf.random_uniform([self.userNum, self.emb_dim], minval=-self.initdelta, maxval=self.initdelta,
                                      dtype=tf.float32))
                self.item_embeddings = tf.Variable(
                    tf.random_uniform([self.itemNum, self.emb_dim], minval=-self.initdelta, maxval=self.initdelta,
                                      dtype=tf.float32))
                self.item_bias = tf.Variable(tf.zeros([self.itemNum]))
            else:
                self.user_embeddings = tf.Variable(self.param[0])
                self.item_embeddings = tf.Variable(self.param[1])
                self.item_bias = tf.Variable(self.param[2])

        self.d_params = [self.user_embeddings, self.item_embeddings, self.item_bias]

        # placeholder definition
        self.u = tf.placeholder(tf.int32)
        self.i = tf.placeholder(tf.int32)
        self.label = tf.placeholder(tf.float32)

        self.u_embedding = tf.nn.embedding_lookup(self.user_embeddings, self.u)
        self.i_embedding = tf.nn.embedding_lookup(self.item_embeddings, self.i)
        
        #self.i_bias = tf.gather(self.item_bias, self.i)

        self.input_embedding = tf.concat([self.u_embedding, self.i_embedding],1)
        
        weights = {
            'hidden': tf.Variable(tf.random_normal([2*self.emb_dim, self.hidden_num_units], seed=seed)),
            'output': tf.Variable(tf.random_normal([self.hidden_num_units, 1], seed=seed))
        }

        biases = {
            'hidden': tf.Variable(tf.random_normal([self.hidden_num_units], seed=seed)),
            'output': tf.Variable(tf.random_normal([1], seed=seed))
        }
        
        hidden_layer = tf.add(tf.matmul(self.input_embedding, weights['hidden']), biases['hidden'])
        hidden_layer = tf.nn.relu(hidden_layer)

        self.pre_logits = tf.matmul(hidden_layer, weights['output']) + biases['output']
        
#         self.pre_logits = tf.reduce_sum(tf.multiply(self.u_embedding, self.i_embedding), 1) + self.i_bias
        self.pre_loss = tf.nn.sigmoid_cross_entropy_with_logits(labels=self.label,
                                                                logits=self.pre_logits) + self.lamda * (
            tf.nn.l2_loss(self.u_embedding) + tf.nn.l2_loss(self.i_embedding) + tf.nn.l2_loss(self.i_bias)
        )

        d_opt = tf.train.GradientDescentOptimizer(self.learning_rate)
        self.d_updates = d_opt.minimize(self.pre_loss, var_list=self.d_params)

#         self.reward_logits = tf.reduce_sum(tf.multiply(self.u_embedding, self.i_embedding),
#                                            1) + self.i_bias
        self.reward_logits = tf.matmul(hidden_layer, weights['output']) + biases['output']
    
        self.reward = 2 * (tf.sigmoid(self.reward_logits) - 0.5)

        # for test stage, self.u: [batch_size]
#.......................................Modifications 2


        # self.all_rating = tf.matmul(self.u_embedding, self.item_embeddings, transpose_a=False,
                                    # transpose_b=True) + self.item_bias
        self.all_pairs = [[ tf.concat(x,y) for x in self.u_embedding ] for y in self.item_embeddings]
        hidden_layer = tf.add(tf.matmul(self.all_pairs,weights['hidden']),biases['hidden'])
        hidden_layer = tf.nn.relu(hidden_layer)

        self.all_rating = tf.matmul(hidden_layer,weights['output']) + biases['output']

#........................................Modifications
        
        # self.all_pairs = [[ tf.concat(x,y) for x in self.u_embedding ] for y in self.item_embeddings]

        hidden_layer = tf.add(tf.matmul(self.all_pairs, weights['hidden']), biases['hidden'])
        hidden_layer = tf.nn.relu(hidden_layer)

        self.all_logits = tf.matmul(hidden_layer, weights['output']) + biases['output']
        

        # self.all_logits = tf.reduce_sum(tf.multiply(self.u_embedding, self.item_embeddings), 1) + self.item_bias
        self.NLL = -tf.reduce_mean(tf.log(
            tf.gather(tf.reshape(tf.nn.softmax(tf.reshape(self.all_logits, [1, -1])), [-1]), self.i))
        )
        # for dns sample


        # self.all_pairs = [[ tf.concat(x,y) for x in self.u_embedding ] for y in self.item_embeddings]

        hidden_layer = tf.add(tf.matmul(self.all_pairs, weights['hidden']), biases['hidden'])
        hidden_layer = tf.nn.relu(hidden_layer)

        self.dns_rating = tf.matmul(hidden_layer, weights['output']) + biases['output']
Example #32
0
import tensorflow as tf
import numpy as np

tf.set_random_seed(777)

x_data = np.array([[0, 0], [0, 1], [1, 0], [1, 1]], dtype=np.float32)
y_data = np.array([[0], [1], [1], [0]], dtype=np.float32)

X = tf.placeholder(tf.float32, shape=[None, 2])
Y = tf.placeholder(tf.float32, shape=[None, 1])

W = tf.Variable(tf.random_normal([2, 1]), name='weight')
b = tf.Variable(tf.random_normal([1]), name='bias')

# Hypothesis using sigmoid: tf.div(1., 1. + tf.exp(tf.matmul(X, W)))
hypothesis = tf.sigmoid(tf.matmul(X, W) + b)

# cost/loss function 로지스틱 리그레션에서 cost에 - 가 붙는다
cost = -tf.reduce_mean(Y * tf.log(hypothesis) +
                       (1 - Y) * tf.log(1 - hypothesis))

train = tf.train.GradientDescentOptimizer(learning_rate=0.1).minimize(cost)

# Accuracy computation
# True if hypothesis > 0.5 else False
predicted = tf.cast(hypothesis > 0.5, dtype=tf.float32)
accuracy = tf.reduce_mean(tf.cast(tf.equal(predicted, Y), dtype=tf.float32))

with tf.Session() as sess:

    sess.run(tf.global_variables_initializer())
Example #33
0
    def model_fn(features, labels, mode, params):
        """
        define how to train, evaluate and predict from the transfomer model.
        Args:

            mode:
            params:

        Returns:

        """
        inputs = features['inputs']
        seq_steps = features['seq_len']

        is_training = (mode == tf.estimator.ModeKeys.TRAIN)

        try:
            batch_size, length = get_shape_list(inputs, expected_rank=2)
        except ValueError:
            batch_size = 1
            length = get_shape_list(inputs, expected_rank=1)[0]
            inputs = tf.reshape(inputs, [batch_size, length])

        with tf.variable_scope('model'):
            # Build model
            model = DKT(params, is_training)
            logits = model(batch_size, inputs,
                           seq_steps)  # [batch, length, vocab_size]

            # when in prediction mode, the label/target is Bone, the model output is the prediction
            if mode == tf.estimator.ModeKeys.PREDICT:
                export_outputs = {
                    'predict_output':
                    tf.estimator.export.PredictOutput(
                        {"predict": tf.sigmoid(logits)})
                }
                output_spec = tf.estimator.EstimatorSpec(
                    mode=mode,
                    predictions={'predict': tf.sigmoid(logits)},
                    export_outputs=export_outputs)
            else:
                # Calculate model loss
                target_ids = features['target_id']
                target_correct = features['target_correct']

                loss = dkt_loss(logits, target_correct, target_ids, seq_steps)
                record_dict = {}
                record_dict['minibatch_loss'] = loss
                # Save loss as named tensor will be logged with the logging hook
                tf.identity(loss, 'cross_entropy')

                if mode == tf.estimator.ModeKeys.EVAL:
                    metric_dict = get_eval_metrics(logits, target_correct,
                                                   target_ids, seq_steps)
                    record_dict['accuracy'] = metric_dict['accuracy']
                    record_scalars(record_dict)
                    output_spec = tf.estimator.EstimatorSpec(
                        mode=tf.estimator.ModeKeys.EVAL,
                        loss=loss,
                        predictions={'predict': tf.sigmoid(logits)},
                        eval_metric_ops=metric_dict)
                else:  # train
                    # check whether restore from checkpoint
                    tvars = tf.trainable_variables()
                    initialized_variable_names = {}

                    tf.logging.info("**** Trainable Variables ****")
                    for var in tvars:
                        init_string = ""
                        if var.name in initialized_variable_names:
                            init_string = ", *INIT_FROM_CKPT*"
                        tf.logging.info("  name = %s, shape = %s%s", var.name,
                                        var.shape, init_string)

                    train_op, metric_dict = get_train_op_and_metrics(
                        loss, params)
                    acc_metric = get_eval_metrics(logits, target_correct,
                                                  target_ids, seq_steps)
                    record_dict['accuracy'] = acc_metric['accuracy']
                    record_dict['learning_rate'] = metric_dict['learning_rate']
                    record_scalars(record_dict)
                    output_spec = tf.estimator.EstimatorSpec(
                        mode=tf.estimator.ModeKeys.TRAIN,
                        loss=loss,
                        train_op=train_op)
        return output_spec
    def feedback_block1(self, inputs, state=None, var_list=None):

        hidden_state = None
        cell_state = None
        
        if state is not None:
            hidden_state = state['hidden_state']
            cell_state = state['cell_state']

        assert (cell_state is None) == (hidden_state is None), 'cell_state and hidden_state must BOTH be supplied as arguments.'
            
        with tf.variable_scope('vfeedbacknet_{}'.format(Model.model_name), reuse=True):
            with tf.variable_scope('feedback_block1'):
                W_xf = tf.get_variable('W_xf')
                W_xi = tf.get_variable('W_xi')
                W_xc = tf.get_variable('W_xc')
                W_xo = tf.get_variable('W_xo')

                W_hf = tf.get_variable('W_hf')
                W_hi = tf.get_variable('W_hi')
                W_hc = tf.get_variable('W_hc')
                W_ho = tf.get_variable('W_ho')

                W_cf = tf.get_variable('W_cf')
                W_ci = tf.get_variable('W_ci')
                W_co = tf.get_variable('W_co')

                b_f = tf.get_variable('b_f')
                b_i = tf.get_variable('b_i')
                b_c = tf.get_variable('b_c')
                b_o = tf.get_variable('b_o')

                i_t = tf.sigmoid(
                    tf.nn.bias_add(
                        tf.nn.conv2d(inputs, W_xi, [1, 1, 1, 1], padding='SAME')  +
                        (tf.nn.conv2d(hidden_state, W_hi, [1, 1, 1, 1], padding='SAME') if hidden_state is not None else tf.to_float(0)) +
                        (tf.multiply(cell_state, W_ci, name='element_wise_multipy') if cell_state is not None else tf.to_float(0)),
                        b_i)
                )

                f_t = tf.sigmoid(
                    tf.nn.bias_add(
                        tf.nn.conv2d(inputs, W_xf, [1, 1, 1, 1], padding='SAME')  +
                        (tf.nn.conv2d(hidden_state, W_hf, [1, 1, 1, 1], padding='SAME') if hidden_state is not None else tf.to_float(0)) +
                        (tf.multiply(cell_state, W_cf, name='element_wise_multipy_ft') if cell_state is not None else tf.to_float(0)),
                        b_f)
                )

                new_cell_state = (tf.multiply(f_t, cell_state, name='element_wise_multipy_ct1') if cell_state is not None else tf.to_float(0)) + \
                                 tf.multiply(i_t, 
                                             tf.tanh(
                                                 tf.nn.bias_add(
                                                     tf.nn.conv2d(inputs, W_xc, [1, 1, 1, 1], padding='SAME')  +
                                                     (tf.nn.conv2d(hidden_state, W_hc, [1, 1, 1, 1], padding='SAME') if hidden_state is not None else tf.to_float(0)),
                                                     b_c)
                                             ), name='element_wise_multipy_ct2'
                                 )
                
                o_t = tf.sigmoid(
                    tf.nn.bias_add(
                        tf.nn.conv2d(inputs, W_xo, [1, 1, 1, 1], padding='SAME')  +
                        (tf.nn.conv2d(hidden_state, W_ho, [1, 1, 1, 1], padding='SAME') if hidden_state is not None else tf.to_float(0)) +
                        tf.multiply(new_cell_state, W_co, name='element_wise_multipy_ot'), 
                        b_o)
                )

                new_hidden_state = tf.multiply(o_t, tf.tanh(new_cell_state), name='element_wise_multipy_it')

                if var_list is not None:
                    for var in [W_xf, W_xi, W_xc, W_xo,
                                W_hf, W_hi, W_hc, W_ho,
                                W_cf, W_ci, W_co,
                                b_f, b_i, b_c, b_o]:

                        if var not in var_list:
                            var_list.append(var)
                            
                return { 'hidden_state' : new_hidden_state, 'cell_state' : new_cell_state } 
def train(inputs, epochs, batch_size=BATCH_SIZE):
    global_step = tf.Variable(0,
                              dtype=tf.int32,
                              trainable=False,
                              name='global_step')

    print 'Loading embeddings...'
    map_index_vec = pickle.load(open(EMBED_PATH + inputs['embed'], 'rb'))
    print('Done.')

    n_symbols = len(map_index_vec)
    ew = np.zeros((n_symbols, 50), dtype=np.float32)
    for index, vec in map_index_vec.items():
        ew[index, :] = vec

    with tf.name_scope("data"):
        in1 = tf.placeholder(tf.int32, shape=[None, MAX_STEP], name='in1')
        in2 = tf.placeholder(tf.int32, shape=[None, MAX_STEP], name='in2')
        # overlap = tf.placeholder(tf.float32, shape=[None, 2], name='overlap')
        target = tf.placeholder(tf.float32, shape=[None], name='target')
        tf.add_to_collection("in1", in1)
        tf.add_to_collection("in2", in2)
        # tf.add_to_collection("overlap",overlap)
        # print 'target : ',target

    with tf.name_scope("embedding"):
        embedding_weights = tf.Variable(initial_value=ew,
                                        name='embedding_weights')
        q1 = tf.nn.embedding_lookup(embedding_weights, in1, name='embed_q1')
        q2 = tf.nn.embedding_lookup(embedding_weights, in2, name='embed_q2')
        # q1 = tf.transpose(eq1, [0,2,1], name='q1')
        # q2 = tf.transpose(eq2, [0,2,1], name='q2')
        print 'q2 : ', q2

    with tf.variable_scope('gru1') as scope:
        x1 = tf.unstack(q1, MAX_STEP, 1)
        x2 = tf.unstack(q2, MAX_STEP, 1)
        gru_cell = tf.contrib.rnn.GRUCell(N_HIDDEN_1)
        y11, _ = tf.contrib.rnn.static_rnn(gru_cell, x1, dtype=tf.float32)
        scope.reuse_variables()
        y12, _ = tf.contrib.rnn.static_rnn(gru_cell, x2, dtype=tf.float32)

    with tf.variable_scope('gru2') as scope:
        gru_cell = tf.contrib.rnn.GRUCell(N_HIDDEN_2)
        y21, _ = tf.contrib.rnn.static_rnn(gru_cell, y11, dtype=tf.float32)
        scope.reuse_variables()
        y22, _ = tf.contrib.rnn.static_rnn(gru_cell, y12, dtype=tf.float32)

    with tf.variable_scope('gru3') as scope:
        gru_cell = tf.contrib.rnn.GRUCell(N_HIDDEN_3)
        _, y1 = tf.contrib.rnn.static_rnn(gru_cell, y21, dtype=tf.float32)
        scope.reuse_variables()
        _, y2 = tf.contrib.rnn.static_rnn(gru_cell, y22, dtype=tf.float32)

    with tf.variable_scope('process_state'):
        y_d = tf.squared_difference(y1, y2, name='h_sub_sq')
        y_cos = tf.reduce_prod(tf.stack(values=[y1, y2],
                                        axis=2,
                                        name='h_concat'),
                               reduction_indices=2,
                               name='h_dot')
        y = tf.concat(values=[y1, y2, y_d, y_cos], axis=1)
        print 'y : ', y

    with tf.variable_scope('dense1') as scope:
        w = tf.Variable(tf.truncated_normal([4 * N_HIDDEN_3, N_HIDDEN_DENSE],
                                            stddev=0.1,
                                            dtype=tf.float32),
                        name='weights1')
        b = tf.Variable(tf.zeros([N_HIDDEN_DENSE], dtype=tf.float32),
                        name="bias1")

        hidden = tf.matmul(y, w) + b
        print 'hidden : ', hidden

    with tf.variable_scope('dropout') as scope:
        keep_prob = tf.placeholder(tf.float32)
        hidden_dropout = tf.nn.dropout(hidden, keep_prob)

    with tf.variable_scope('dense2') as scope:
        w = tf.Variable(tf.truncated_normal([N_HIDDEN_DENSE, 1],
                                            stddev=0.1,
                                            dtype=tf.float32),
                        name='weights2')
        b = tf.Variable(tf.zeros([1], dtype=tf.float32), name="bias2")

        logits = tf.matmul(hidden_dropout, w) + b
        logits = tf.reshape(logits, [-1], name='logits')
        print 'logits : ', logits

    with tf.name_scope('lr') as scope:
        cross_entropy = tf.reduce_mean(
            tf.nn.sigmoid_cross_entropy_with_logits(
                logits=logits, labels=target))  #,keep_dims=True)
        print 'cross_entropy : ', cross_entropy

    optimizer = tf.train.AdamOptimizer(1e-4)
    train_step = optimizer.minimize(cross_entropy, global_step=global_step)

    prediction = tf.sigmoid(logits, name='prediction')
    tf.add_to_collection("prediction", prediction)
    correct_prediction = tf.equal(tf.round(prediction), target)
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

    with tf.name_scope("summaries"):
        tf.summary.scalar("loss", cross_entropy)
        tf.summary.scalar("accuracy", accuracy)
        tf.summary.histogram("histogram_loss", cross_entropy)
        summary_op = tf.summary.merge_all()

    with tf.Session() as sess:
        print 'Starting session'
        sess.run(tf.global_variables_initializer())

        saver = tf.train.Saver()

        # it = PaddedDataIterator( pd.read_csv('../input_clean/train_conv.csv') )

        if not os.path.exists(CHECKPOINT_PATH):
            os.makedirs(CHECKPOINT_PATH)

        ckpt = tf.train.get_checkpoint_state(os.path.dirname(CHECKPOINT_PATH))
        if ckpt and ckpt.model_checkpoint_path:
            saver.restore(sess, ckpt.model_checkpoint_path)
            print global_step.eval()

        print '1'
        writer = tf.summary.FileWriter(LOGDIR, sess.graph)
        print '2'

        it = PaddedDataIterator(pd.read_csv(inputs['train_file']))
        print '3'
        int_step = 0
        while epochs > 0:
            # print 'Iteration %d' % int_step

            batch, epoch_complete = it.next_batch(batch_size)

            if int_step % 100 == 0:
                # print batch['vec1'].shape
                train_accuracy = sess.run(accuracy,
                                          feed_dict={
                                              in1: batch['vec1'],
                                              target: batch['is_duplicate'],
                                              in2: batch['vec2'],
                                              keep_prob: KEEP_PROB
                                          })  #, overlap: batch['overlap']})
                print('Step %d: Training accuracy %g' %
                      (int_step, train_accuracy))
                print("{} Saving checkpoint of model...".format(
                    datetime.now()))

                #save checkpoint of the model
                checkpoint_name = os.path.join(CHECKPOINT_PATH, 'model_step')
                save_path = saver.save(sess,
                                       checkpoint_name,
                                       global_step=global_step)
                #saver.export_meta_graph(save_path+'.meta')

                print("{} Model checkpoint saved at {}".format(
                    datetime.now(), checkpoint_name))

            _, summary = sess.run(
                [train_step, summary_op],
                feed_dict={
                    in1: batch['vec1'],
                    in2: batch['vec2'],
                    target: batch['is_duplicate'],
                    keep_prob: KEEP_PROB
                })  #, overlap: batch['overlap']})
            writer.add_summary(summary, global_step=global_step.eval())

            int_step = global_step.eval()
            epochs = epochs - epoch_complete
            if epoch_complete:
                print 'Epochs left = ', epochs
            sys.stdout.flush()

        checkpoint_name = os.path.join(CHECKPOINT_PATH, 'model_train.ckpt')
        save_path = saver.save(sess, checkpoint_name)
        #saver.export_meta_graph(save_path+'.meta')
        del it
        writer.close()
Example #36
0
    def __init__(self, W_embedding, settings):
        self.model_name = settings.model_name
        self.fact_len = settings.fact_len
        self.hidden_size = settings.hidden_size
        self.num_classes = settings.num_classes
        self.filter_sizes = settings.filter_sizes
        self.kernel_size = settings.kernel_size
        self.num_filters = settings.num_filters
        self.n_filter_total = self.num_filters * len(self.filter_sizes)
        self.fc_hidden_size = settings.fc_hidden_size

        self.attn_mode = settings.attn_mode
        self.seq_encoder = settings.seq_encoder
        self.dropout = settings.dropout
        self.out_caps_num = settings.out_caps_num
        self.rout_iter = settings.rout_iter

        self.initializer = tf.contrib.layers.xavier_initializer()
        self._global_step = tf.Variable(0, trainable=False, name='Global_Step')
        self.update_emas = list()
        self._tst = tf.placeholder(tf.bool)
        self._batch_size = tf.placeholder(tf.int32, [])
        self.is_train = tf.placeholder(dtype=tf.bool, name='is_train')

        with tf.name_scope('Inputs'):
            self._X_inputs = tf.placeholder(tf.int32, [None, 16, 16],
                                            name='X_input')
            self.sNum = tf.placeholder(shape=(None, ),
                                       dtype=tf.int32,
                                       name='ph_sNum')
            # shape(b_sz, sNum) [[6, 3, 5, 3], [22, 44, 22],] 句子词数
            self.wNum = tf.placeholder(shape=(None, 16),
                                       dtype=tf.int32,
                                       name='ph_wNum')
            self._y_inputs = tf.placeholder(tf.float32,
                                            [None, self.num_classes],
                                            name='y_input')

        with tf.variable_scope('embedding'):
            self.embedding = tf.get_variable(
                name='embedding',
                shape=W_embedding.shape,
                initializer=tf.constant_initializer(W_embedding),
                trainable=True)
        self.embedding_size = W_embedding.shape[1]

        with tf.variable_scope('Atten_TextCNN'):
            output = self._inference()
            print('output ', output)
            bs, w = output.get_shape().as_list()

        with tf.variable_scope('fc-bn-layer'):
            W_fc = self.weight_variable([w, self.fc_hidden_size],
                                        name='Weight_fc')
            tf.summary.histogram('W_fc', W_fc)
            h_fc = tf.matmul(output, W_fc, name='h_fc')
            beta_fc = tf.Variable(
                tf.constant(0.1,
                            tf.float32,
                            shape=[self.fc_hidden_size],
                            name="beta_fc"))
            tf.summary.histogram('beta_fc', beta_fc)
            fc_bn, update_ema_fc = self.batchnorm(h_fc,
                                                  beta_fc,
                                                  convolutional=False)
            self.update_emas.append(update_ema_fc)
            self.fc_bn_relu = tf.nn.relu(fc_bn, name="relu")

        with tf.variable_scope('out_layer'):
            W_out = self.weight_variable(
                [self.fc_hidden_size, self.num_classes], name='Weight_out')
            tf.summary.histogram('Weight_out', W_out)
            b_out = self.bias_variable([self.num_classes], name='bias_out')
            tf.summary.histogram('bias_out', b_out)
            self.logits = tf.nn.xw_plus_b(self.fc_bn_relu,
                                          W_out,
                                          b_out,
                                          name='y_pred')  # 每个类别的分数 scores
            self._y_pred = tf.sigmoid(self.logits)

        with tf.name_scope('loss'):
            self._loss = tf.reduce_mean(
                tf.nn.sigmoid_cross_entropy_with_logits(logits=self.logits,
                                                        labels=self._y_inputs))
            tf.summary.scalar('loss', self._loss)

        self.saver = tf.train.Saver(max_to_keep=1)
 def sigmoid_network(self, x):
     z1 = tf.sigmoid(x, name="z1")
     return z1
Example #38
0
    def __init__(self, is_training, config):
        self.batch_size = batch_size = config.batch_size
        self.num_steps = num_steps = config.num_steps
        size = config.hidden_size
        vocab_size = config.vocab_size

        self._input_data = tf.placeholder(tf.int32, [batch_size, num_steps])
        self._targets = tf.placeholder(tf.int32, [batch_size, num_steps])

        if 'cell_type' not in dir(config) or config.cell_type == 'gru':
            cell = BitGRUCell(size, w_bit=config.w_bit, f_bit=config.f_bit)
        elif config.cell_type == 'lstm':
            cell = BitLSTMCell(size, w_bit=config.w_bit, f_bit=config.f_bit)
        if is_training and config.keep_prob < 1:
            cell = tf.nn.rnn_cell.DropoutWrapper(
                cell, output_keep_prob=config.keep_prob)
        cell = tf.nn.rnn_cell.MultiRNNCell([cell] * config.num_layers,
                                           state_is_tuple=False)

        self._initial_state = cell.zero_state(batch_size, tf.float32)
        self._initial_state = bit_utils.round_bit(tf.sigmoid(
            self._initial_state),
                                                  bit=config.f_bit)

        embedding = tf.get_variable(
            "embedding", [vocab_size, size],
            initializer=tf.random_uniform_initializer())
        inputs = tf.nn.embedding_lookup(embedding, self._input_data)

        inputs = bit_utils.round_bit(tf.nn.relu(inputs), bit=config.f_bit)

        if is_training and config.keep_prob < 1:
            inputs = tf.nn.dropout(inputs, config.keep_prob)

        inputs = [
            tf.squeeze(input_, [1])
            for input_ in tf.split(1, num_steps, inputs)
        ]
        outputs, state = tf.nn.rnn(cell,
                                   inputs,
                                   initial_state=self._initial_state)

        output = tf.reshape(tf.concat(1, outputs), [-1, size])
        with bit_utils.replace_variable(
                lambda x: bit_utils.quantize_w(tf.tanh(x), bit=config.w_bit)):
            softmax_w = tf.get_variable("softmax_w", [size, vocab_size])
        softmax_b = tf.get_variable("softmax_b", [vocab_size])
        logits = tf.matmul(output, softmax_w) + softmax_b
        loss = tf.nn.seq2seq.sequence_loss_by_example(
            [logits], [tf.reshape(self._targets, [-1])],
            [tf.ones([batch_size * num_steps])])
        self._cost = cost = tf.reduce_sum(loss) / batch_size
        self._final_state = state

        if not is_training:
            return

        self._lr = tf.Variable(0.0, trainable=False)
        tvars = tf.trainable_variables()
        grads, _ = tf.clip_by_global_norm(tf.gradients(cost, tvars),
                                          config.max_grad_norm)
        optimizer = tf.train.AdamOptimizer(self.lr)
        self._train_op = optimizer.apply_gradients(zip(grads, tvars))
import tensorflow
import numpy

xy = numpy.loadtxt('data-03-diabetes.csv', delimiter=',', dtype=numpy.float32)
print("==========================")
print(xy)
x_data = xy[:, 0:-1]
y_data = xy[:, [-1]]

X = tensorflow.placeholder(tensorflow.float32, shape=[None, 8])
Y = tensorflow.placeholder(tensorflow.float32, shape=[None, 1])

W = tensorflow.Variable(tensorflow.random_normal([8, 1]), name='weight')
b = tensorflow.Variable(tensorflow.random_normal([1]), name='bias')

hypothesis = tensorflow.sigmoid(tensorflow.matmul(X, W) + b)
cost = -tensorflow.reduce_mean(Y * tensorflow.log(hypothesis) +
                               (1 - Y) * tensorflow.log(1 - hypothesis))
train = tensorflow.train.GradientDescentOptimizer(
    learning_rate=0.01).minimize(cost)

predicted = tensorflow.cast(hypothesis > 0.5, dtype=tensorflow.float32)
accuracy = tensorflow.reduce_mean(
    tensorflow.cast(tensorflow.equal(predicted, Y), dtype=tensorflow.float32))

with tensorflow.Session() as session:
    session.run(tensorflow.global_variables_initializer())

    feed = {X: x_data, Y: y_data}
    for step in range(10001):
        session.run(train, feed_dict=feed)
	def __call__(self, inputs, state, timestep = 0):
		with tf.variable_scope("LN_GRU"):
			c, h = state
			concat_bias = tf.get_variable("concat_bias", [self._num_units * 2], tf.float32, tf.constant_initializer(0.0))
			if self._MI:
				mi_alpha = tf.get_variable("MI_alpha", [self._num_units * 2], tf.float32, tf.constant_initializer(1.0))

			with tf.variable_scope("input_weight_matrix"):
				W_x = tf.get_variable("W_x", [self._num_inputs, self._num_units * 2], tf.float32, tf.contrib.layers.variance_scaling_initializer())
				gamma_wx = tf.get_variable("gamma_wx", [self._num_units * 2], tf.float32, tf.constant_initializer(1.0))
				beta_wx = tf.get_variable("beta_wx", [self._num_units * 2], tf.float32, tf.constant_initializer(0.0))
				if self._MI:
					mi_beta_x = tf.get_variable("MI_beta_x", [self._num_units * 2], tf.float32, tf.constant_initializer(0.5))

				input_act = tf.matmul(inputs, W_x)
				input_norm = self.normalize_acts(input_act)
				Wx_act = gamma_wx * input_norm + beta_wx

			with tf.variable_scope("hidden_hidden_matrix"):
				W_h = tf.get_variable("W_h", [self._num_units, self._num_units * 2], tf.float32, tf.orthogonal_initializer())
				gamma_wh = tf.get_variable("gamma_wh", [self._num_units * 2], tf.float32, tf.constant_initializer(1.0))
				beta_wh = tf.get_variable("beta_wh", [self._num_units * 2], tf.float32, tf.constant_initializer(0.0))
				if self._MI:
					mi_beta_h = tf.get_variable("MI_beta_h", [self._num_units * 2], tf.float32, tf.constant_initializer(0.5))

				h_act = tf.matmul(h, W_h)
				h_norm = self.normalize_acts(h_act)
				Wh_act = gamma_wh * h_norm + beta_wh

			if self._MI:
				concat_acts = mi_alpha * Wh_act * Wx_act + mi_beta_x * Wx_act + mi_beta_h * Wh_act + concat_bias
			else:
				concat_acts = Wh_act + Wx_act + concat_bias

			z_t, r_t = tf.split(concat_acts, num_or_size_splits = 2, axis = 1)
			z_t += self._forget_bias
			z_t = tf.sigmoid(z_t)

			with tf.variable_scope("candidate_matrix"):
				U_h = tf.get_variable("U_h", [self._num_units, self._num_units], tf.float32, tf.orthogonal_initializer())
				U_x = tf.get_variable("U_x", [self._num_inputs, self._num_units], tf.float32, tf.contrib.layers.variance_scaling_initializer())

				gamma_uh = tf.get_variable("gamma_uh", [self._num_units], tf.float32, tf.constant_initializer(1.0))
				beta_uh = tf.get_variable("beta_uh", [self._num_units], tf.float32, tf.constant_initializer(0.0))
				gamma_ux = tf.get_variable("gamma_ux", [self._num_units], tf.float32, tf.constant_initializer(1.0))
				beta_ux = tf.get_variable("beta_ux", [self._num_units], tf.float32, tf.constant_initializer(0.0))
				if self._MI:
					mi_alpha_hat = tf.get_variable("mi_alpha_hat", [self._num_units], tf.float32, tf.constant_initializer(1.0))
					mi_beta_hat_h = tf.get_variable("mi_beta_hat_h", [self._num_units], tf.float32, tf.constant_initializer(0.5))
					mi_beta_hat_x = tf.get_variable("mi_beta_hat_x", [self._num_units], tf.float32, tf.constant_initializer(0.5))
					mi_beta_hat = tf.get_variable("mi_beta_hat", [self._num_units], tf.float32, tf.constant_initializer(0.0))

				Ux_hat = tf.matmul(inputs, U_x)
				Ux_hat_norm = self.normalize_acts(Ux_hat)
				Ux_hat_act = gamma_ux * Ux_hat_norm + beta_ux

				Uh_hat = tf.matmul(h, U_h)
				Uh_hat_norm = self.normalize_acts(Uh_hat)
				Uh_hat_act = gamma_uh * Uh_hat_norm + beta_uh

				cand_act = tf.sigmoid(r_t) * Uh_hat_act

				if self._MI:
					h_proposed = self._activation(mi_alpha_hat * Ux_hat_act * cand_act + mi_beta_hat_h * cand_act + mi_beta_hat_x * Ux_hat_act + mi_beta_hat)
				else:
					h_proposed = self._activation(Ux_hat_act + cand_act)

				h_t = z_t * h + (1. - z_t) * h_proposed

			return h_t, (h_t, h_t)
def main(_):
  # Import data
  num_crops = FLAGS.num_crops
  num_plots = FLAGS.num_plots

  # Create the model
  x = tf.placeholder(tf.float32, [None, num_crops])
  W1 = tf.Variable(tf.zeros([num_crops, LAYER_SIZE]))
  b1 = tf.Variable(tf.zeros([LAYER_SIZE]))
  y1 = tf.matmul(x, W1) + b1
  y1 = tf.sigmoid(y1)
  W3 = tf.Variable(tf.zeros([LAYER_SIZE, num_crops*num_plots]))
  b3 = tf.Variable(tf.zeros([num_crops*num_plots]))
  y = tf.matmul(y1, W3) + b3
  #y = tf.sigmoid(y)

  probabilities = []
  for i in range(0, num_plots):
    probabilities.append(tf.nn.softmax(y[:,i*num_crops:(i+1)*num_crops]))

  y_ = tf.placeholder(tf.float32, [None, num_crops*num_plots])
  cross_entropy_total = tf.reduce_mean(
                    tf.nn.softmax_cross_entropy_with_logits(
                    labels=y_[:,0:num_crops],
                    logits=y[:,0:num_crops]))
  for i in range(1, num_plots):
    cross_entropy = tf.reduce_mean(
            tf.nn.softmax_cross_entropy_with_logits(
              labels=y_[:,i*num_crops:(i+1)*num_crops],
              logits=y[:,i*num_crops:(i+1)*num_crops]))
    cross_entropy_total = tf.add(cross_entropy, cross_entropy_total)
  cross_entropy_total = tf.mul(cross_entropy_total, 1.0/num_plots)
  lr = tf.Variable(0.0, trainable=False)
  train_step = tf.train.AdamOptimizer(lr).minimize(cross_entropy_total)
  # Train
  probability = []
  probability2 = []
  logits = []
  with tf.Session() as sess:
    sess.run(tf.initialize_all_variables())
    saver = tf.train.Saver(tf.all_variables())
    inputs, outputs = load_data(FLAGS.data_dir, FLAGS.num_crops, 'data_train.csv', num_plots)
    eval_inputs, eval_outputs = load_data(FLAGS.data_dir, FLAGS.num_crops, 'data_eval.csv', num_plots)
    points = []
    for i in range(EPOCHS):
      sess.run(tf.assign(lr, LEARNING_RATE * (DECAY ** i)))
      for j in range(NUM_BATCHES):
        batch_size = int(len(inputs)/float(NUM_BATCHES))
        batch_xs = inputs[j*batch_size:(j+1)*batch_size,:]
        batch_ys = outputs[j*batch_size:(j+1)*batch_size,:]
        _, loss = sess.run([train_step, cross_entropy_total], feed_dict={x: batch_xs, y_: batch_ys})
        #print "The loss for iteration " + str(i*NUM_BATCHES + j) + " is " + str(loss)
        points.append([i*NUM_BATCHES + j, loss])
      correct_prediction = tf.equal(tf.argmax(y, 1), tf.argmax(y_, 1))
      accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
      acc = sess.run(accuracy, feed_dict={x: eval_inputs, y_: eval_outputs})
      print "Epoch " + str(i) + " has loss " + str(loss)
    saver.save(sess, "save/model.ckpt", global_step=i)
    probability = sess.run(probabilities, feed_dict={x: [[3.2,3.2,3.2,3.2,3.2]]})
    probability2 = sess.run(probabilities, feed_dict={x: [[1,1,1,1,1]]})
    logits = sess.run(y, feed_dict={x: [[3.2,3.2,3.2,3.2,3.2]]})
  for i in probability:
    print ",".join(map(str,i.tolist()[0]))
  # import pdb
  # pdb.set_trace()
  points = np.array(points)
  plt.plot(points[:,0],points[:,1],linewidth=2.0)
  plt.show()
Example #42
0
 def __init__(self, logits):
     self.logits = logits
     self.ps = tf.sigmoid(logits)
Example #43
0
    def __init__(self,
                 sequence_length,
                 num_classes_list,
                 total_classes,
                 vocab_size,
                 fc_hidden_size,
                 embedding_size,
                 embedding_type,
                 l2_reg_lambda=0.0,
                 pretrained_embedding=None):

        # Placeholders for input, output, dropout_prob and training_tag
        self.input_x = tf.placeholder(tf.int32, [None, sequence_length],
                                      name="input_x")
        self.input_y_first = tf.placeholder(tf.float32,
                                            [None, num_classes_list[0]],
                                            name="input_y_first")
        self.input_y_second = tf.placeholder(tf.float32,
                                             [None, num_classes_list[1]],
                                             name="input_y_second")
        self.input_y_third = tf.placeholder(tf.float32,
                                            [None, num_classes_list[2]],
                                            name="input_y_third")
        self.input_y = tf.placeholder(tf.float32, [None, total_classes],
                                      name="input_y")
        self.dropout_keep_prob = tf.placeholder(tf.float32,
                                                name="dropout_keep_prob")
        self.is_training = tf.placeholder(tf.bool, name="is_training")

        self.global_step = tf.Variable(0, trainable=False, name="Global_Step")

        # Embedding Layer
        with tf.device("/cpu:0"), tf.name_scope("embedding"):
            # Use random generated the word vector by default
            # Can also be obtained through our own word vectors trained by our corpus
            if pretrained_embedding is None:
                self.embedding = tf.Variable(tf.random_uniform(
                    [vocab_size, embedding_size],
                    minval=-1.0,
                    maxval=1.0,
                    dtype=tf.float32),
                                             trainable=True,
                                             name="embedding")
            else:
                if embedding_type == 0:
                    self.embedding = tf.constant(pretrained_embedding,
                                                 dtype=tf.float32,
                                                 name="embedding")
                if embedding_type == 1:
                    self.embedding = tf.Variable(pretrained_embedding,
                                                 trainable=True,
                                                 dtype=tf.float32,
                                                 name="embedding")
            self.embedded_sentence = tf.nn.embedding_lookup(
                self.embedding, self.input_x)

            # Average Vectors
            # [batch_size, embedding_size]
            self.embedded_sentence_average = tf.reduce_mean(
                self.embedded_sentence, axis=1)

        # First Level
        with tf.name_scope("first-fc"):
            W = tf.Variable(tf.truncated_normal(
                shape=[embedding_size, fc_hidden_size],
                stddev=0.1,
                dtype=tf.float32),
                            name="W")
            b = tf.Variable(tf.constant(value=0.1,
                                        shape=[fc_hidden_size],
                                        dtype=tf.float32),
                            name="b")
            self.first_fc = tf.nn.xw_plus_b(self.embedded_sentence_average, W,
                                            b)
            self.first_fc_out = tf.nn.relu(self.first_fc, name="relu")

        with tf.name_scope("first-output"):
            W = tf.Variable(tf.truncated_normal(
                shape=[fc_hidden_size, num_classes_list[0]],
                stddev=0.1,
                dtype=tf.float32),
                            name="W")
            b = tf.Variable(tf.constant(value=0.1,
                                        shape=[num_classes_list[0]],
                                        dtype=tf.float32),
                            name="b")
            self.first_logits = tf.nn.xw_plus_b(self.first_fc_out,
                                                W,
                                                b,
                                                name="logits")
            self.first_scores = tf.sigmoid(self.first_logits, name="scores")

        # Second Level
        with tf.name_scope("second-fc"):
            self.second_input = tf.concat(
                [self.first_scores, self.embedded_sentence_average], axis=1)
            W = tf.Variable(tf.truncated_normal(shape=[
                (num_classes_list[0] + embedding_size), fc_hidden_size
            ],
                                                stddev=0.1,
                                                dtype=tf.float32),
                            name="W")
            b = tf.Variable(tf.constant(value=0.1,
                                        shape=[fc_hidden_size],
                                        dtype=tf.float32),
                            name="b")
            self.second_fc = tf.nn.xw_plus_b(self.second_input, W, b)
            self.second_fc_out = tf.nn.relu(self.second_fc, name="relu")

        with tf.name_scope("second-output"):
            W = tf.Variable(tf.truncated_normal(
                shape=[fc_hidden_size, num_classes_list[1]],
                stddev=0.1,
                dtype=tf.float32),
                            name="W")
            b = tf.Variable(tf.constant(value=0.1,
                                        shape=[num_classes_list[1]],
                                        dtype=tf.float32),
                            name="b")
            self.second_logits = tf.nn.xw_plus_b(self.second_fc_out,
                                                 W,
                                                 b,
                                                 name="logits")
            self.second_scores = tf.sigmoid(self.second_logits, name="scores")

        # Third Level
        with tf.name_scope("third-fc"):
            self.third_input = tf.concat(
                [self.second_scores, self.embedded_sentence_average], axis=1)
            W = tf.Variable(tf.truncated_normal(shape=[
                (num_classes_list[1] + embedding_size), fc_hidden_size
            ],
                                                stddev=0.1,
                                                dtype=tf.float32),
                            name="W")
            b = tf.Variable(tf.constant(value=0.1,
                                        shape=[fc_hidden_size],
                                        dtype=tf.float32),
                            name="b")
            self.third_fc = tf.nn.xw_plus_b(self.third_input, W, b)
            self.third_fc_out = tf.nn.relu(self.third_fc, name="relu")

        with tf.name_scope("third-output"):
            W = tf.Variable(tf.truncated_normal(
                shape=[fc_hidden_size, num_classes_list[2]],
                stddev=0.1,
                dtype=tf.float32),
                            name="W")
            b = tf.Variable(tf.constant(value=0.1,
                                        shape=[num_classes_list[2]],
                                        dtype=tf.float32),
                            name="b")
            self.third_logits = tf.nn.xw_plus_b(self.third_fc_out,
                                                W,
                                                b,
                                                name="logits")
            self.third_scores = tf.sigmoid(self.third_logits, name="scores")

        # Final scores
        with tf.name_scope("output"):
            self.scores = tf.concat(
                [self.first_scores, self.second_scores, self.third_scores],
                axis=1,
                name="scores")

        # Calculate mean cross-entropy loss, L2 loss
        with tf.name_scope("loss"):

            def cal_loss(labels, logits, name):
                losses = tf.nn.sigmoid_cross_entropy_with_logits(labels=labels,
                                                                 logits=logits)
                losses = tf.reduce_mean(tf.reduce_sum(losses, axis=1),
                                        name=name + "sigmoid_losses")
                return losses

            # Loss
            losses_1 = cal_loss(labels=self.input_y_first,
                                logits=self.first_logits,
                                name="first_")
            losses_2 = cal_loss(labels=self.input_y_second,
                                logits=self.second_logits,
                                name="second_")
            losses_3 = cal_loss(labels=self.input_y_third,
                                logits=self.third_logits,
                                name="third_")
            losses = tf.add_n([losses_1, losses_2, losses_3], name="losses")

            # L2 Loss
            l2_losses = tf.add_n([
                tf.nn.l2_loss(tf.cast(v, tf.float32))
                for v in tf.trainable_variables()
            ],
                                 name="l2_losses") * l2_reg_lambda
            self.loss = tf.add_n([losses, l2_losses], name="loss")
Example #44
0
        output = tf.layers.dense(output, 32)
        logits = tf.layers.dense(output, 1)

    # 计算损失
    with tf.variable_scope("loss"):
        loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(labels=labels_, logits=logits))

        # 选择优化器
        with tf.variable_scope("train_step"):
            global_step = tf.Variable(0, name="global_step", trainable=False)
            global_add = global_step.assign_add(1)  # 用于计数
            train_op = tf.train.AdamOptimizer(params.lr).minimize(loss)

        # 准确率/f1/p/r计算
        with tf.variable_scope("evaluation"):
            pred = tf.cast(tf.greater(tf.sigmoid(logits), 0.26), tf.float32)
            accuracy = tf.reduce_mean(tf.cast(tf.equal(pred, labels_), tf.float32), name="accuracy")
            # 混淆矩阵
            # _|0 |1 |
            # 0|2 |3 |
            # 1|2 |3 |

            true = tf.reshape(labels_, (-1,))
            pred = tf.reshape(pred, (-1,))

            epsilon = 1e-7
            cm = tf.contrib.metrics.confusion_matrix(true, pred, num_classes=2)

            precision = tf.cast(cm[1][1] / tf.reduce_sum(cm[:, 1]), tf.float32, name="precision")
            recall = tf.cast(cm[1][1] / tf.reduce_sum(cm[1], axis=0), tf.float32, name="recall")
            f1_score = tf.cast((2 * precision * recall / (precision + recall + epsilon)), tf.float32, name="f1_score")
def main():

    tf.reset_default_graph()

    print("\n\n")
    print("Defining placeholders...")

    input_genre = tf.placeholder(dtype=tf.int32, shape=GENERATOR_BATCH_SIZE)
    latent_vector = tf.placeholder(dtype=tf.float32,
                                   shape=[GENERATOR_BATCH_SIZE, LATENT_SIZE])
    real_data = tf.placeholder(dtype=tf.bool,
                               shape=[
                                   REAL_DATA_BATCH_SIZE, NUM_BARS,
                                   BEATS_PER_BAR, NUM_NOTES, NUM_TRACKS
                               ])
    real_data = tf.cast(real_data, tf.float32)
    discriminator_labels_real = tf.placeholder(dtype=tf.float32,
                                               shape=REAL_DATA_BATCH_SIZE)
    discriminator_labels_fake = tf.placeholder(dtype=tf.float32,
                                               shape=GENERATOR_BATCH_SIZE)
    #real_data_labels = tf.placeholder(dtype = tf.int32, shape = None)
    #labels = tf.one_hot(real_data_labels, NUM_CLASSES)

    print("Constructing Model...")
    generator_out = Generator(input_genre, latent_vector, LATENT_SIZE,
                              NUM_TRACKS, NUM_CLASSES)
    #refiner_out = Refiner(generator_out, NUM_TRACKS, RESIDUAL_LAYERS, SLOPE_TENSOR)

    with tf.variable_scope('Discriminator') as scope:
        fake_out = Discriminator(generator_out, NUM_TRACKS)
        scope.reuse_variables()
        real_out = Discriminator(real_data, NUM_TRACKS)

    classifier_out, classifier_out_1, classifier_out_2, classifier_out_3 = Classifier(
        generator_out, NUM_TRACKS, NUM_CLASSES)

    #print("Real_data", real_data)
    print("\n\n")
    print("Generator out: ", generator_out)
    #print("Refiner out: ", refiner_out)
    print("Fake out: ", fake_out)
    print("Real out: ", real_out)
    print("Classifier out: ", classifier_out)
    print("\n\n")

    variance = tf.reduce_mean(tf.square(tf.subtract(generator_out, 0.5)),
                              [0, 1, 2, 3, 4],
                              name=None,
                              keep_dims=False)

    #LOAD MODEL AND GENERATE MUSIC
    classifier_labels = tf.one_hot(input_genre, NUM_CLASSES)

    #TRAIN GAN
    #classifier_accuracy, acc_op = tf.metrics.accuracy(input_genre, tf.argmax(classifier_out, 1))
    classifier_accuracy, acc_op = tf.metrics.accuracy(
        input_genre, tf.argmax(classifier_out, 1))
    discriminator_accuracy, acc_disc = tf.metrics.accuracy(
        tf.concat([
            tf.ones(fake_out.get_shape(), dtype=tf.int32),
            tf.zeros(real_out.get_shape(), dtype=tf.int32)
        ],
                  axis=-1),
        tf.round(tf.sigmoid(tf.concat([fake_out, real_out], axis=-1))))

    #DECLARE LOSS FUNCTIONS
    generator_loss, discriminator_loss = VAC_GAN_loss(
        fake_out, real_out, classifier_out, classifier_labels,
        discriminator_labels_real, discriminator_labels_fake, variance)
    #print(generator_loss)
    #print(discriminator_loss)

    #DECLARE TRAINABLE_VARIABLES
    generator_varlist = list(
        filter(lambda a: "generator" in a.name,
               [v for v in tf.trainable_variables()]))
    discriminator_varlist = list(
        filter(lambda a: "discriminator" in a.name,
               [v for v in tf.trainable_variables()]))

    generator_optim = tf.train.AdamOptimizer(
        learning_rate=GENERATOR_LEARNING_RATE,
        beta1=BETA_1,
        beta2=BETA_2,
        epsilon=1e-08,
        use_locking=False,
        name='Generator_Optimizer').minimize(generator_loss,
                                             var_list=generator_varlist)
    discriminator_optim = tf.train.AdamOptimizer(
        learning_rate=DISCRIMINATOR_LEARNING_RATE,
        beta1=BETA_1,
        beta2=BETA_2,
        epsilon=1e-08,
        use_locking=False,
        name='Discriminator_Optimizer').minimize(
            discriminator_loss, var_list=discriminator_varlist)

    #INITIALIZE VARIABLES
    print("Initialising session...")
    init_g = tf.global_variables_initializer()
    init_l = tf.local_variables_initializer()
    sess = tf.Session()
    sess.run(init_g)
    sess.run(init_l)

    saver = tf.train.Saver()

    def optimistic_restore(session, save_file, graph=tf.get_default_graph()):
        reader = tf.train.NewCheckpointReader(save_file)
        saved_shapes = reader.get_variable_to_shape_map()
        var_names = sorted([(var.name, var.name.split(':')[0])
                            for var in tf.global_variables()
                            if var.name.split(':')[0] in saved_shapes])
        restore_vars = []
        for var_name, saved_var_name in var_names:
            curr_var = graph.get_tensor_by_name(var_name)
            var_shape = curr_var.get_shape().as_list()
            if var_shape == saved_shapes[saved_var_name]:
                restore_vars.append(curr_var)
        opt_saver = tf.train.Saver(restore_vars)
        opt_saver.restore(session, save_file)

    #LOAD IN CLASSIFIER_WEIGHTS
    checkpoint_path = r'/home/cofphe/Documents/jacob-luka/Models/saved_models_2018-12-13_00:29:45'
    optimistic_restore(sess, tf.train.latest_checkpoint(checkpoint_path))

    data_path = abspath(sys.argv[1])
    print("Loading in Data from: ", data_path)
    data = Data(data_path)  #Path to directory containing music set
    """
    #TEST TO MAKE SURE CLASSIFIER WEIGHTS LOADING CORRECTLY
    classifier_accuracy = []
    for i in tqdm(range(100)):
        genre_batch = data.get_genre()
        data_batch, label_batch = data.get_batch()
        latent_batch = data.get_noise()

        #print(genre_batch)
        #print(latent_batch)
        classifier_accuracy.append(sess.run([acc_op], feed_dict={input_genre: genre_batch, latent_vector: latent_batch, real_data: data_batch, real_data_labels: label_batch})[0])

    print(np.mean(np.asarray(classifier_accuracy)))
    #exit()
    """

    models_directory = join(
        sys.argv[2],
        ("saved_models_" + datetime.now().strftime('%Y-%m-%d_%H:%M:%S')))
    os.makedirs(models_directory, exist_ok=True)
    accuracy_old = 0

    songs_directory = join(
        sys.argv[3],
        ("generated_song_" + datetime.now().strftime('%Y-%m-%d_%H:%M:%S')))
    os.makedirs(songs_directory, exist_ok=True)

    #with open(join(models_directory, "Model.csv"), 'w') as f:
    #f.write("LOSS,ACCURACY\n")
    """
    #TEST TO MAKE SURE BATCHING IS DONE CORRECTLY
    data_label_test = []
    for i in range(30):
        data_batch, label_batch = data.get_batch()
        data_label_test.append(sess.run([real_data_labels], feed_dict={real_data: data_batch, real_data_labels: label_batch})[0])
    data_label_test = np.concatenate(data_label_test)
    print(np.sum(np.sum(data_label_test == 0)))
    print(np.sum(np.sum(data_label_test == 1)))
    print(np.sum(np.sum(data_label_test == 2)))
    exit()
    """

    genre_batch = data.get_genre()
    data_batch = data.get_batch()
    latent_batch = data.get_noise()
    #print(data_batch)

    #print(genre_batch)
    #print(latent_batch)
    #print(sess.run([classifier_labels], feed_dict={input_genre: genre_batch, latent_vector: latent_batch, real_data: data_batch, real_data_labels: label_batch})[0][16:32])

    #exit()
    BATCHES_PER_EPOCH = int(data.num_examples / REAL_DATA_BATCH_SIZE)

    #progress = trange(2000, desc = 'Bar_desc', leave = True)
    progress = trange(BATCHES_PER_EPOCH * GAN_EPOCHS,
                      desc='Bar_desc',
                      leave=True)

    for t in progress:
        genre_batch = data.get_genre()
        data_batch = data.get_batch()
        latent_batch = data.get_noise()
        discriminator_labels_real_batch, discriminator_labels_fake_batch = data.get_labels(
        )
        #print(sess.run([variance],feed_dict={input_genre: genre_batch, latent_vector: latent_batch, real_data: data_batch, discriminator_labels_real: discriminator_labels_real_batch, discriminator_labels_fake: discriminator_labels_fake_batch}))
        if t % G_D_ASPECT_RATIO == 0:
            loss_generator, optim_generator, loss_discriminator, optim_discriminator, class_acc, disc_acc, variance_batch = sess.run(
                [
                    generator_loss, generator_optim, discriminator_loss,
                    discriminator_optim, acc_op, acc_disc, variance
                ],
                feed_dict={
                    input_genre: genre_batch,
                    latent_vector: latent_batch,
                    real_data: data_batch,
                    discriminator_labels_real: discriminator_labels_real_batch,
                    discriminator_labels_fake: discriminator_labels_fake_batch
                })
        else:
            loss_generator, optim_generator, class_acc, disc_acc, variance_batch = sess.run(
                [generator_loss, generator_optim, acc_op, acc_disc, variance],
                feed_dict={
                    input_genre: genre_batch,
                    latent_vector: latent_batch,
                    real_data: data_batch,
                    discriminator_labels_real: discriminator_labels_real_batch,
                    discriminator_labels_fake: discriminator_labels_fake_batch
                })
        progress.set_description('GEN LOSS ===> ' + str(loss_generator) +
                                 ' DIS LOSS ===> ' + str(loss_discriminator) +
                                 '  CLASS ACC ===> ' + str(class_acc) +
                                 '  DISC ACC ===> ' + str(disc_acc) +
                                 '  VAR ===> ' + str(variance_batch))
        progress.refresh()

        with open(join(models_directory, "Model.csv"), 'a') as f:
            f.write(
                str(loss_generator) + "," + str(loss_discriminator) + "," +
                str(class_acc) + "," + str(disc_acc) + "," +
                str(variance_batch) + "\n")
        if t % BATCHES_PER_EPOCH == 0 or t == BATCHES_PER_EPOCH * GAN_EPOCHS:
            print("Epoch Completed")
            print("Making Music")
            filename = "model-" + str(
                (t * BATCH_SIZE) / data.num_examples) + "-" + str(class_acc)
            saver.save(sess, join(models_directory, filename))
            generated_music, generated_genre = sess.run(
                [tf.cast(tf.round(generator_out), tf.bool), input_genre],
                feed_dict={
                    input_genre: genre_batch,
                    latent_vector: latent_batch,
                    real_data: data_batch,
                    discriminator_labels_real: discriminator_labels_real_batch,
                    discriminator_labels_fake: discriminator_labels_fake_batch
                })
            print("NUM NOTES", np.sum(generated_music))
            for jj in range(GENERATOR_BATCH_SIZE):
                if generated_genre[jj] == 0:
                    generate_genre_name = 'alternative'
                elif generated_genre[jj] == 1:
                    generate_genre_name = 'rock'
                elif generated_genre[jj] == 2:
                    generate_genre_name = 'classic'
                generated_phrase = generated_music[jj, :, :, :, :]
                convert_to_npz(generated_phrase, songs_directory,
                               (str(jj) + '_' + generate_genre_name +
                                '_EPOCH_' + str(int(t / BATCHES_PER_EPOCH))))

    generator_out_batch, generated_music, generated_genre = sess.run(
        [
            generator_out,
            tf.cast(tf.round(generator_out), tf.bool), input_genre
        ],
        feed_dict={
            input_genre: genre_batch,
            latent_vector: latent_batch,
            real_data: data_batch,
            discriminator_labels_real: discriminator_labels_real_batch,
            discriminator_labels_fake: discriminator_labels_fake_batch
        })
    print(generator_out_batch[0, 1, :, :, 1])
    print('\n\n')
    print(generated_music[0, 1, :, :, 1])
    for jj in range(GENERATOR_BATCH_SIZE):
        if generated_genre[jj] == 0:
            generate_genre_name = 'alternative'
        elif generated_genre[jj] == 1:
            generate_genre_name = 'rock'
        elif generated_genre[jj] == 2:
            generate_genre_name = 'classic'
        generated_phrase = generated_music[jj, :, :, :, :]
        convert_to_npz(generated_phrase, songs_directory,
                       (str(jj) + '_' + generate_genre_name + '_EPOCH_' +
                        str(int(t / BATCHES_PER_EPOCH))))
Example #46
0
File: vae.py Project: ms96590/dl
            # https://stats.stackexchange.com/questions/7440/kl-divergence-between-two-univariate-gaussians
            kl_div = -0.5 * (log_var + 1 - mu**2 - tf.exp(log_var))
            kl_div = tf.reduce_sum(kl_div) / x.shape[0]

            loss = rec_loss + 1. * kl_div

        grads = tape.gradient(loss, model.trainable_variables)
        optimizer.apply_gradients(zip(grads, model.trainable_variables))


        if step % 100 == 0:
            print(epoch, step, 'kl div:', float(kl_div), 'rec loss:', float(rec_loss))


    # evaluation
    z = tf.random.normal((batchsz, z_dim))
    logits = model.decoder(z)
    x_hat = tf.sigmoid(logits)
    x_hat = tf.reshape(x_hat, [-1, 28, 28]).numpy() *255.
    x_hat = x_hat.astype(np.uint8)
    save_images(x_hat, 'vae_images/sampled_epoch%d.png'%epoch)

    x = next(iter(test_db))
    x = tf.reshape(x, [-1, 784])
    x_hat_logits, _, _ = model(x)
    x_hat = tf.sigmoid(x_hat_logits)
    x_hat = tf.reshape(x_hat, [-1, 28, 28]).numpy() *255.
    x_hat = x_hat.astype(np.uint8)
    save_images(x_hat, 'vae_images/rec_epoch%d.png'%epoch)

def Generator(input_genre, latent_vector, LATENT_SIZE, NUM_TRACKS,
              NUM_CLASSES):
    def my_leaky_relu(x):
        return tf.nn.leaky_relu(x, alpha=.5)

    #Class Embedding Layer
    embedding_layer = tf.keras.layers.Embedding(
        NUM_CLASSES,
        LATENT_SIZE,
        embeddings_initializer='glorot_normal',
        name='generator_embedding')
    embedding_lookup = embedding_layer.__call__(input_genre)
    class_input = tf.multiply(latent_vector,
                              embedding_lookup,
                              name='generator_multiply')
    bn_0 = tf.layers.batch_normalization(class_input,
                                         name='generator_batch_norm_0')

    #print(class_input)

    #Shared Generator
    def shared_generator(class_input):
        dense_1 = tf.layers.dense(bn_0, (3 * 128),
                                  activation=my_leaky_relu,
                                  name='generator_dense_1')
        bn_1 = tf.layers.batch_normalization(dense_1,
                                             name='generator_batch_norm_1')
        reshape_1 = tf.reshape(bn_1, [-1, 3, 1, 1, 128],
                               name='generator_reshape_1')
        trans_conv3d_1 = tf.layers.conv3d_transpose(
            reshape_1,
            64, (2, 1, 1), (1, 1, 1),
            activation=my_leaky_relu,
            name='generator_transconv3d_1')
        bn_2 = tf.layers.batch_normalization(trans_conv3d_1,
                                             name='generator_batch_norm_2')
        trans_conv3d_2 = tf.layers.conv3d_transpose(
            bn_2,
            32, (1, 4, 1), (1, 4, 1),
            activation=my_leaky_relu,
            name='generator_transconv3d_2')
        bn_3 = tf.layers.batch_normalization(trans_conv3d_2,
                                             name='generator_batch_norm_3')
        trans_conv3d_3 = tf.layers.conv3d_transpose(
            bn_3,
            32, (1, 1, 3), (1, 1, 3),
            activation=my_leaky_relu,
            name='generator_transconv3d_3')
        bn_4 = tf.layers.batch_normalization(trans_conv3d_3,
                                             name='generator_batch_norm_4')
        trans_conv3d_4 = tf.layers.conv3d_transpose(
            bn_4,
            16, (1, 4, 1), (1, 4, 1),
            activation=my_leaky_relu,
            name='generator_transconv3d_4')
        bn_5 = tf.layers.batch_normalization(trans_conv3d_4,
                                             name='generator_batch_norm_5')
        trans_conv3d_5 = tf.layers.conv3d_transpose(
            bn_5,
            16, (1, 1, 3), (1, 1, 2),
            activation=my_leaky_relu,
            name='generator_transconv3d_5')
        shared_out = tf.layers.batch_normalization(
            trans_conv3d_5, name='generator_batch_norm_6')
        #blah1 = tf.layers.conv3d_transpose(shared_out, 16, (1, 1, 12), (1, 1, 12), activation = my_leaky_relu, name = ('generator_pt_conv3d_1'))
        #print(blah1)
        #blah2 = tf.layers.conv3d_transpose(blah1, 16, (1, 6, 1), (1, 6, 1), activation = my_leaky_relu, name = ('generator_pt_conv3d_2'))
        #print(blah2)
        #generator_out_1 = tf.sigmoid(tf.layers.conv3d_transpose(blah2, 1, (1, 1, 1), (1, 1, 1), name = ('generator_merged_conv3d'))
        #print(blah3)
        #print(shared_out)
        return shared_out

    #Private Generator
    def pitch_time_private(shared_out, track_num):
        pt_conv3d_1 = tf.layers.conv3d_transpose(
            shared_out,
            16, (1, 1, 12), (1, 1, 12),
            activation=my_leaky_relu,
            name=('generator_pt_conv3d_1' + str(track_num)))
        pt_bn_1 = tf.layers.batch_normalization(pt_conv3d_1,
                                                name=('generator_pt_bn_1' +
                                                      str(track_num)))
        pt_conv3d_2 = tf.layers.conv3d_transpose(
            pt_bn_1,
            8, (1, 6, 1), (1, 6, 1),
            activation=my_leaky_relu,
            name=('generator_pt_conv3d_2' + str(track_num)))
        pt_bn_2 = tf.layers.batch_normalization(pt_conv3d_2,
                                                name=('generator_pt_bn_2' +
                                                      str(track_num)))
        return pt_bn_2

    def time_pitch_private(shared_out, track_num):
        tp_conv3d_1 = tf.layers.conv3d_transpose(
            shared_out,
            16, (1, 6, 1), (1, 6, 1),
            activation=my_leaky_relu,
            name=('generator_tp_conv3d_1' + str(track_num)))
        tp_bn_1 = tf.layers.batch_normalization(tp_conv3d_1,
                                                name=('generator_tp_bn_1' +
                                                      str(track_num)))
        tp_conv3d_2 = tf.layers.conv3d_transpose(
            tp_bn_1,
            8, (1, 1, 12), (1, 1, 12),
            activation=my_leaky_relu,
            name=('generator_tp_conv3d_2' + str(track_num)))
        tp_bn_2 = tf.layers.batch_normalization(tp_conv3d_2,
                                                name=('generator_tp_bn_2' +
                                                      str(track_num)))
        return tp_bn_2

    def merged_private(private_out, track_num):
        merged_conv3d = tf.layers.conv3d_transpose(
            private_out,
            1, (1, 1, 1), (1, 1, 1),
            name=('generator_merged_conv3d' + str(track_num)))
        merged_bn = tf.layers.batch_normalization(merged_conv3d,
                                                  name=('generator_merged_bn' +
                                                        str(track_num)))
        return merged_bn

    #Loop Private Generators over all tracks and concat
    shared_out = shared_generator(class_input)
    #print(shared_out)
    private_out = []

    for i in range(NUM_TRACKS):
        private_out.append(
            merged_private(
                tf.concat([
                    pitch_time_private(shared_out, i),
                    time_pitch_private(shared_out, i)
                ], -1), i))

    generator_out = tf.sigmoid(tf.concat(private_out, -1))
    return generator_out
def attention_decoder(decoder_inputs,
                      initial_state,
                      encoder_states,
                      cell,
                      encoder_section_states=None,
                      num_words_section=None,
                      enc_padding_mask=None,
                      enc_section_padding_mask=None,
                      initial_state_attention=False,
                      pointer_gen=True,
                      use_coverage=False,
                      prev_coverage=None,
                      temperature=None):
    """
  Args:
    decoder_inputs: A list of 2D Tensors [batch_size x input_size].
    initial_state: 2D Tensor [batch_size x cell.state_size].
    encoder_states: 3D Tensor [batch_size x seq_len x encoder_output_size].
    cell: rnn_cell.RNNCell defining the cell function and size.
    encoder_section_states: 3D Tensor [batch_size x section_seq_len x encoder_output_size]. Pass None if you don't want hierarchical attentive decoding
    num_words_section: number of words per section [batch_size x section_seq_len]
    enc_padding_mask: 2D Tensor [batch_size x attn_length] containing 1s and 0s; indicates which of the encoder locations are padding (0) or a real token (1).
    enc_section_padding_mask: 3D Tensor [batch_size x num_sections x section_len]
    initial_state_attention:
      Note that this attention decoder passes each decoder input through a linear layer with the previous step's context vector to get a modified version of the input. If initial_state_attention is False, on the first decoder step the "previous context vector" is just a zero vector. If initial_state_attention is True, we use initial_state to (re)calculate the previous step's context vector. We set this to False for train/eval mode (because we call attention_decoder once for all decoder steps) and True for decode mode (because we call attention_decoder once for each decoder step).
    pointer_gen: boolean. If True, calculate the generation probability p_gen for each decoder step.
    use_coverage: boolean. If True, use coverage mechanism.
    prev_coverage:
      If not None, a tensor with shape (batch_size, seq_len). The previous step's coverage vector. This is only not None in decode mode when using coverage.
    simulating the temperature hyperparam for softmax: set to 1.0 for starters

  Returns:
    outputs: A list of the same length as decoder_inputs of 2D Tensors of
      shape [batch_size x cell.output_size]. The output vectors.
    state: The final state of the decoder. A tensor shape [batch_size x cell.state_size].
    attn_dists: A list containing tensors of shape (batch_size,seq_len).
      The attention distributions for each decoder step.
    p_gens: p_gens: List of length input_size, containing tensors of shape [batch_size, 1]. The values of p_gen for each decoder step. Empty list if pointer_gen=False.
    coverage: Coverage vector on the last step computed. None if use_coverage=False.
  """
    print('encoder_states.shape', encoder_states.shape)
    print('decoder_inputs[0].shape', decoder_inputs[0].shape)
    with variable_scope.variable_scope("attention_decoder") as scope:
        batch_size = encoder_states.get_shape(
        )[0].value  # if this line fails, it's because the batch size isn't defined
        enc_output_size = encoder_states.get_shape(
        )[2].value  # encoder state size, if this line fails, it's because the attention length isn't defined

        # Indicator variable for hierarchical attention
        hier = True if encoder_section_states is not None else False

        # Reshape encoder_states (need to insert a dim)
        encoder_states = tf.expand_dims(
            encoder_states,
            axis=2)  # now is shape (batch_size, attn_len, 1, enc_output_size)

        # To calculate attention, we calculate
        #   v^T tanh  (W_h h_i + W_s s_t + b_attn)
        # where h_i is an encoder state, and s_t a decoder state.
        # attn_vec_size is the length of the vectors v, b_attn, (W_h h_i) and (W_s s_t).
        # (W_h h_i) is encoder_features, (W_s s_t) + b_att is decoder_features
        # We set it to be equal to the size of the encoder states.
        attention_vec_size = enc_output_size

        # Get the weight matrix W_h and apply it to each encoder state to get (W_h h_i), the encoder features
        # To multiply batch_size number of time_step sizes of encoder states
        # by W_h, we can use conv2d with stride of 1
        W_h = variable_scope.get_variable(
            "W_h", [1, 1, enc_output_size, attention_vec_size])
        encoder_features = nn_ops.conv2d(
            encoder_states, W_h, [1, 1, 1, 1],
            "SAME")  # shape (batch_size,seq_len,1,attention_vec_size)
        #     encoder_features = tf.Print(encoder_features, [tf.shape(encoder_features)],
        #                'encoder_features.shape = ')

        if hier:
            enc_sec_output_size = encoder_section_states.get_shape()[2].value
            encoder_section_states = tf.expand_dims(encoder_section_states,
                                                    axis=2)
            W_h_s = variable_scope.get_variable(
                "W_h_s", [1, 1, enc_sec_output_size, attention_vec_size])
            encoder_section_features = nn_ops.conv2d(
                encoder_section_states, W_h_s, [1, 1, 1, 1],
                "SAME")  # shape (batch_size,seq_len,1,attention_vec_size)
            v_sec = variable_scope.get_variable("v_sec", [attention_vec_size])
#       encoder_section_features = tf.Print(encoder_section_features, [tf.shape(encoder_section_features)],
#                  'encoder_section_features.shape = ')

# Get the weight vectors v and w_c (w_c is for coverage)
# v^T tanh  (W_h h_i + W_s s_t + W_c c_t + b_attn)
# c_t = \sum_{i=1}^{t-1} a^i  (sum of all attention weights in the previous step) shape=(batch_size, seq_len)
        v = variable_scope.get_variable("v", [attention_vec_size])
        if use_coverage:
            with variable_scope.variable_scope("coverage"):
                w_c = variable_scope.get_variable(
                    "w_c", [1, 1, 1, attention_vec_size])

        if prev_coverage is not None:  # for beam search mode with coverage
            # reshape from (batch_size, seq_len) to (batch_size, attn_len, 1, 1)
            prev_coverage = tf.expand_dims(tf.expand_dims(prev_coverage, 2), 3)

        def attention(decoder_state,
                      coverage=None,
                      num_words_section=None,
                      step=None):
            """Calculate the context vector and attention distribution from the decoder state.

      Args:
        decoder_state: state of the decoder
        coverage: Optional. Previous timestep's coverage vector, shape (batch_size, attn_len, 1, 1).
        num_words_section: number of words in each section (only needed for hierarchical attention)
        [batch_size, num_sections] -- assumes number of sections in the batch is equal (TODO: check sanity)
        step: index of the current decoder step (needed for section attention)

      Returns:
        context_vector: weighted sum of encoder_states
        attn_dist: attention distribution
        coverage: new coverage vector. shape (batch_size, attn_len, 1, 1)
      """
            with variable_scope.variable_scope("Attention"):
                # Pass the decoder state through a linear layer (this is W_s s_t + b_attn in the paper)
                # (W_s s_t) + b_att is decoder_features; s_t = decoder_state
                decoder_features = linear(
                    decoder_state, attention_vec_size,
                    True)  # shape (batch_size, attention_vec_size)
                decoder_features = tf.expand_dims(
                    tf.expand_dims(decoder_features, 1),
                    1)  # reshape to (batch_size, 1, 1, attention_vec_size)

                def masked_attention(e, enc_padding_mask):
                    if enc_section_padding_mask is not None:
                        enc_padding_mask = tf.reshape(enc_section_padding_mask,
                                                      [batch_size, -1])
                        enc_padding_mask = tf.cast(enc_padding_mask,
                                                   tf.float32)
                    """Take softmax of e then apply enc_padding_mask and re-normalize"""
                    attn_dist = nn_ops.softmax(
                        e)  # take softmax. shape (batch_size, attn_length)
                    attn_dist *= enc_padding_mask  # apply mask
                    masked_sums = tf.reduce_sum(attn_dist,
                                                axis=1)  # shape (batch_size)
                    return attn_dist / tf.reshape(masked_sums,
                                                  [-1, 1])  # re-normalize

                if use_coverage and coverage is not None:  # non-first step of coverage
                    if not hier:
                        # Multiply coverage vector by w_c to get coverage_features.
                        coverage_features = nn_ops.conv2d(
                            coverage, w_c, [1, 1, 1, 1], "SAME"
                        )  # c has shape (batch_size, seq_len, 1, attention_vec_size)

                        # Calculate v^T tanh(W_h h_i + W_s s_t + w_c c_i^t + b_attn)
                        e = math_ops.reduce_sum(
                            v *
                            math_ops.tanh(encoder_features + decoder_features +
                                          coverage_features),
                            [2, 3])  # shape (batch_size,seq_len)

                        # Take softmax of e to get the attention distribution
                        #           attn_dist = nn_ops.softmax(e) # shape (batch_size, seq_len)
                        attn_dist = masked_attention(e, enc_padding_mask)

                        # Update coverage vector
                        coverage += array_ops.reshape(
                            attn_dist, [batch_size, -1, 1, 1
                                        ])  # shape=(batch_size, seq_len,1,1)
                    else:
                        with tf.variable_scope("attention_sections"):
                            if FLAGS.fixed_attn:
                                tf.logging.debug('running with fixed attn',
                                                 '\r')
                                decoder_features_sec = linear(
                                    decoder_state,
                                    attention_vec_size,
                                    True,
                                    scope='Linear--Section-Features'
                                )  # shape (batch_size, attention_vec_size)
                                decoder_features_sec = tf.expand_dims(
                                    tf.expand_dims(decoder_features_sec, 1), 1
                                )  # reshape to (batch_size, 1, 1, attention_vec_size)
                                e_sec = math_ops.reduce_sum(
                                    v_sec *
                                    math_ops.tanh(encoder_section_features +
                                                  decoder_features_sec),
                                    [2, 3])  # [batch_size x seq_len_sections]
                                attn_dist_sec = nn_ops.softmax(e_sec)
                            else:
                                e_sec = math_ops.reduce_sum(
                                    v_sec *
                                    math_ops.tanh(encoder_section_features +
                                                  decoder_features),
                                    [2, 3])  # [batch_size x seq_len_sections]
                                attn_dist_sec = nn_ops.softmax(e_sec)
                        with tf.variable_scope("attention_words"):
                            coverage_features = nn_ops.conv2d(
                                coverage, w_c, [1, 1, 1, 1], "SAME"
                            )  # c has shape (batch_size, seq_len, 1, attention_vec_size)

                            # Calculate v^T tanh(W_h h_i + W_s s_t + w_c c_i^t + b_attn)
                            e = math_ops.reduce_sum(
                                v * math_ops.tanh(encoder_features +
                                                  decoder_features +
                                                  coverage_features),
                                [2, 3])  # shape (batch_size,seq_len)

                            # Multiply by section weights

                            e = tf.reshape(
                                e, [batch_size, -1, num_words_section[0][0]])
                            e = tf.multiply(e, attn_dist_sec[:, :, tf.newaxis])
                            e = tf.reshape(e, [batch_size, -1])

                            #               --- Some hack for reweighting attention (similar to temp for softmax)
                            if temperature > 0.0:
                                e = e * temperature

                            attn_dist = masked_attention(e, enc_padding_mask)
                            coverage += array_ops.reshape(
                                attn_dist,
                                [batch_size, -1, 1, 1
                                 ])  # shape=(batch_size, seq_len,1,1)

                else:
                    # Calculate v^T tanh(W_h h_i + W_s s_t + b_attn)
                    if hier:
                        with tf.variable_scope("attention_sections"):
                            if FLAGS.fixed_attn:
                                decoder_features_sec = linear(
                                    decoder_state,
                                    attention_vec_size,
                                    True,
                                    scope='Linear--Section-Features'
                                )  # shape (batch_size, attention_vec_size)
                                decoder_features_sec = tf.expand_dims(
                                    tf.expand_dims(decoder_features_sec, 1), 1
                                )  # reshape to (batch_size, 1, 1, attention_vec_size)
                                e_sec = math_ops.reduce_sum(
                                    v_sec *
                                    math_ops.tanh(encoder_section_features +
                                                  decoder_features_sec),
                                    [2, 3])  # [batch_size x seq_len_sections]
                                attn_dist_sec = nn_ops.softmax(e_sec)
                            else:
                                e_sec = math_ops.reduce_sum(
                                    v_sec *
                                    math_ops.tanh(encoder_section_features +
                                                  decoder_features),
                                    [2, 3])  # [batch_size x seq_len_sections]
                                attn_dist_sec = nn_ops.softmax(e_sec)

                        with tf.variable_scope("attention_words"):

                            e = math_ops.reduce_sum(
                                v * math_ops.tanh(encoder_features +
                                                  decoder_features),
                                [2, 3])  #[batch_size x seq_len]

                            e = tf.reshape(
                                e, [batch_size, -1, num_words_section[0][0]])
                            e = tf.multiply(e, attn_dist_sec[:, :, tf.newaxis])
                            e = tf.reshape(e, [batch_size, -1])

                            if temperature > 0.0:
                                e = e * temperature

                            attn_dist = masked_attention(e, enc_padding_mask)

                    else:
                        e = math_ops.reduce_sum(
                            v *
                            math_ops.tanh(encoder_features + decoder_features),
                            [2, 3])  # calculate e
                        # Take softmax of e to get the attention distribution
                        if enc_padding_mask is not None:
                            attn_dist = masked_attention(e, enc_padding_mask)
                        else:
                            attn_dist = nn_ops.softmax(
                                e)  # shape (batch_size, seq_len)

                    if use_coverage:  # first step of training
                        coverage = tf.expand_dims(tf.expand_dims(attn_dist, 2),
                                                  2)  # initialize coverage

                # TODO: coverage for hier

                # Calculate the context vector from attn_dist and encoder_states
                # ecnoder_sates = [batch , seq_len , 1 , encoder_output_size], attn_dist = [batch, seq_len, 1, 1]
                context_vector = math_ops.reduce_sum(
                    array_ops.reshape(attn_dist, [batch_size, -1, 1, 1]) *
                    encoder_states,
                    [1, 2])  # shape (batch_size, enc_output_size).
                context_vector = array_ops.reshape(context_vector,
                                                   [-1, enc_output_size])

            if hier:
                return context_vector, attn_dist, coverage, attn_dist_sec
            else:
                return context_vector, attn_dist, coverage

        outputs = []
        attn_dists = []
        attn_dists_sec_list = []
        p_gens = []
        state = initial_state
        coverage = prev_coverage  # initialize coverage to None or whatever was passed in
        context_vector = array_ops.zeros([batch_size, enc_output_size])
        context_vector.set_shape([
            None, enc_output_size
        ])  # Ensure the second shape of attention vectors is set.
        if initial_state_attention:  # true in decode mode
            # Re-calculate the context vector from the previous step so that we can pass it through a linear layer with this step's input to get a modified version of the input
            if hier:
                context_vector, attn_dist, coverage, attn_dist_sec = attention(
                    initial_state, coverage, num_words_section
                )  # in decode mode, this is what updates the coverage vector
            else:
                context_vector, _, coverage = attention(
                    initial_state, coverage
                )  # in decode mode, this is what updates the coverage vector
        for i, inp in enumerate(decoder_inputs):
            if (i % 1) == 0:
                print("Adding attention_decoder timesteps. %i done of %i" %
                      (i + 1, len(decoder_inputs)),
                      end='\r')
            if i > 0:
                variable_scope.get_variable_scope().reuse_variables()

            # Merge input and previous attentions into one vector x of the same size as inp
            # inp is [batch_size, input_size]
            input_size = inp.get_shape().with_rank(2)[1]
            if input_size.value is None:
                raise ValueError("Could not infer input size from input: %s" %
                                 inp.name)
            x = linear([inp] + [context_vector], input_size, True)

            # Run the decoder RNN cell. cell_output = decoder state
            #       print("x.shape", x.shape)
            #       try:
            #         print("state.shape", state.shape)
            #       except AttributeError:
            #         print("state.c.shape", state.c.shape)
            cell_output, state = cell(x, state)

            # Run the attention mechanism.
            if i == 0 and initial_state_attention:  # always true in decode mode
                with variable_scope.variable_scope(
                        variable_scope.get_variable_scope(), reuse=True
                ):  # you need this because you've already run the initial attention(...) call
                    if hier:
                        context_vector, attn_dist, coverage, attn_dist_sec = attention(
                            state, coverage, num_words_section)
                    else:
                        context_vector, attn_dist, _ = attention(
                            state, coverage)  # don't allow coverage to update
            else:
                if hier:
                    context_vector, attn_dist, coverage, attn_dist_sec = attention(
                        state, coverage, num_words_section)
                else:
                    context_vector, attn_dist, coverage = attention(
                        state, coverage)


#       TODO: delete
#       Added for debug purpuses
#       def _debug_func(context_vector, attn_dist, encoder_features, encoder_section_states, encoder_states):
#           print('context_vector', context_vector.shape, context_vector)
#           print('attn_dist', attn_dist.shape, attn_dist)
#           print('encoder_features', encoder_features.shape, encoder_features)
#           print('encoder_section_states', encoder_section_states.shape, encoder_section_states)
#           print('encoder_states', encoder_states.shape, encoder_states)
#           import pdb; pdb.set_trace()
#           return False
#       debug_op = tf.py_func(_debug_func, [context_vector, attn_dist, encoder_features, encoder_section_states, encoder_states], [tf.bool])
#       with tf.control_dependencies(debug_op):
#           context_vector = tf.identity(context_vector, name='context_vector')

            attn_dists.append(attn_dist)
            if hier:
                attn_dists_sec_list.append(attn_dist_sec)

            # Calculate p_gen
            if pointer_gen:
                with tf.variable_scope('calculate_pgen'):
                    p_gen = linear([context_vector, state.c, state.h, x], 1,
                                   True)  # a scalar
                    p_gen = tf.sigmoid(p_gen)
                    p_gens.append(p_gen)

            # Concatenate the cell_output (= decoder state) and the context vector, and pass them through a linear layer
            # This is V[s_t, h*_t] + b in the paper
            with variable_scope.variable_scope("AttnOutputProjection"):
                output = linear([cell_output] + [context_vector],
                                cell.output_size, True)
            outputs.append(output)

        # If using coverage, reshape it
        if coverage is not None:
            coverage = array_ops.reshape(coverage, [batch_size, -1])
        return outputs, state, attn_dists, p_gens, coverage, attn_dists_sec_list
Example #49
0
 def sigmoid(self, x, name=None):
     return tf.sigmoid(x, name=name)
def pass_through_sigmoid(x, slope=1):
    """Sigmoid that uses identity function as its gradient"""
    g = tf.get_default_graph()
    with ops.name_scope("PassThroughSigmoid") as name:
        with g.gradient_override_map({"Sigmoid": "Identity"}):
            return tf.sigmoid(x, name=name)
b_conv2 = bias_variable([64])
h_conv2 = tf.nn.relu(conv2d(h_pool1, W_conv2) + b_conv2)
h_pool2 = max_pool_3x3(h_conv2)
#连接层
#加入一个有512个神经元的全连接层,用于处理整个图片。
W_fc1 = weight_variable([3 * 3 * 64, 20])
b_fc1 = bias_variable([20])
h_pool2_flat = tf.reshape(h_pool2, [-1, 3 * 3 * 64])
h_fc1 = tf.nn.relu(tf.matmul(h_pool2_flat, W_fc1) + b_fc1)
#为了减少过拟合,我们在输出层之前加入dropout
keep_prob = tf.placeholder(tf.float32)
h_fc1_drop = tf.nn.dropout(h_fc1, keep_prob)
#输出层
W_fc2 = weight_variable([20, 1])
b_fc2 = bias_variable([1])
y_conv = tf.sigmoid(tf.matmul(h_fc1_drop, W_fc2) + b_fc2)
#损失函数
cross_entropy = -tf.reduce_sum(y_ * tf.log(y_conv))
train_step = tf.train.AdamOptimizer(1e-4).minimize(cross_entropy)
#cross_entropy = tf.reduce_mean(-tf.reduce_sum(y_ * tf.log(y_conv), reduction_indices=[1]))
#train_step = tf.train.AdamOptimizer(1e-4).minimize(cross_entropy)
correct_prediction = tf.equal(tf.argmax(y_conv, 1), tf.argmax(y_, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
#初始化变量
sess.run(tf.global_variables_initializer())
'''
分割训练数据
'''


def split_data(df_vec):
net_name = 'squeeze_normal-drone-dev'
folder_name = './networks/%s' % net_name
with gfile.FastGFile(folder_name + "/minimal_graph_quant.pb", 'rb') as f:
    graph_def = tf.GraphDef()
    graph_def.ParseFromString(f.read())
    tf.import_graph_def(graph_def, name='')

sq_graph = tf.get_default_graph()
inp_batch = sq_graph.get_tensor_by_name('Input_batching/batch:0')
t_activations = sq_graph.get_tensor_by_name('activation/activations:0')
print(inp_batch)

k = p.ANCHOR_COUNT
t_deltas = tf.slice(t_activations, [0, 0, 0, 0], [-1, -1, -1, 4 * k])
t_gammas = tf.sigmoid(
    tf.slice(t_activations, [0, 0, 0, 4 * k], [-1, -1, -1, k]))
t_classes = tf.slice(t_activations, [0, 0, 0, 5 * k],
                     [-1, -1, -1, p.OUT_CLASSES * k])

t_chosen_anchor = tf.argmax(t_gammas, axis=3)

all_out = [t_deltas, t_gammas, t_classes, t_chosen_anchor]

sess = tf.Session()
batch_size = 1
print('loading image.. ', end='')


def read_resize(pic):
    return misc.imresize(misc.imread(pic), (256, 256))
def cw(model, x, y=None, eps=1.0, ord_=2, T=2,
       optimizer=tf.train.AdamOptimizer(learning_rate=0.1), alpha=0.9,
       min_prob=0, clip=(0.0, 1.0)):
    """CarliniWagner (CW) attack.

    Only CW-L2 and CW-Linf are implemented since I do not see the point of
    embedding CW-L2 in CW-L1.  See https://arxiv.org/abs/1608.04644 for
    details.

    The idea of CW attack is to minimize a loss that comprises two parts: a)
    the p-norm distance between the original image and the adversarial image,
    and b) a term that encourages the incorrect classification of the
    adversarial images.

    Please note that CW is a optimization process, so it is tricky.  There are
    lots of hyper-parameters to tune in order to get the best result.  The
    binary search process for the best eps values is omitted here.  You could
    do grid search to find the best parameter configuration, if you like.  I
    demonstrate binary search for the best result in an example code.

    :param model: The model wrapper.
    :param x: The input clean sample, usually a placeholder.  NOTE that the
              shape of x MUST be static, i.e., fixed when constructing the
              graph.  This is because there are some variables that depends
              upon this shape.
    :param y: The target label.  Set to be the least-likely label when None.
    :param eps: The scaling factor for the second penalty term.
    :param ord_: The p-norm, 2 or inf.  Actually I only test whether it is 2
        or not 2.
    :param T: The temperature for sigmoid function.  In the original paper,
              the author used (tanh(x)+1)/2 = sigmoid(2x), i.e., t=2.  During
              our experiment, we found that this parameter also affects the
              quality of generated adversarial samples.
    :param optimizer: The optimizer used to minimize the CW loss.  Default to
        be tf.AdamOptimizer with learning rate 0.1. Note the learning rate is
        much larger than normal learning rate.
    :param alpha: Used only in CW-L0.  The decreasing factor for the upper
        bound of noise.
    :param min_prob: The minimum confidence of adversarial examples.
        Generally larger min_prob wil lresult in more noise.
    :param clip: A tuple (clip_min, clip_max), which denotes the range of
        values in x.

    :return: A tuple (train_op, xadv, noise).  Run train_op for some epochs to
             generate the adversarial image, then run xadv to get the final
             adversarial image.  Noise is in the sigmoid-space instead of the
             input space.  It is returned because we need to clear noise
             before each batched attacks.
    """
    xshape = x.get_shape().as_list()
    noise = tf.get_variable('noise', xshape, tf.float32,
                            initializer=tf.initializers.zeros)

    # scale input to (0, 1)
    x_scaled = (x - clip[0]) / (clip[1] - clip[0])

    # change to sigmoid-space, clip to avoid overflow.
    z = tf.clip_by_value(x_scaled, 1e-8, 1-1e-8)
    xinv = tf.log(z / (1 - z)) / T

    # add noise in sigmoid-space and map back to input domain
    xadv = tf.sigmoid(T * (xinv + noise))
    xadv = xadv * (clip[1] - clip[0]) + clip[0]

    ybar, logits = model(xadv, logits=True)
    ydim = ybar.get_shape().as_list()[1]

    if y is not None:
        y = tf.cond(tf.equal(tf.rank(y), 0),
                    lambda: tf.fill([xshape[0]], y),
                    lambda: tf.identity(y))
    else:
        # we set target to the least-likely label
        y = tf.argmin(ybar, axis=1, output_type=tf.int32)

    mask = tf.one_hot(y, ydim, on_value=0.0, off_value=float('inf'))
    yt = tf.reduce_max(logits - mask, axis=1)
    yo = tf.reduce_max(logits, axis=1)

    # encourage to classify to a wrong category
    loss0 = tf.nn.relu(yo - yt + min_prob)

    axis = list(range(1, len(xshape)))
    ord_ = float(ord_)

    # make sure the adversarial images are visually close
    if 2 == ord_:
        # CW-L2 Original paper uses the reduce_sum version.  These two
        # implementation does not differ much.

        # loss1 = tf.reduce_sum(tf.square(xadv-x), axis=axis)
        loss1 = tf.reduce_mean(tf.square(xadv-x))
    else:
        # CW-Linf
        tau0 = tf.fill([xshape[0]] + [1]*len(axis), clip[1])
        tau = tf.get_variable('cw8-noise-upperbound', dtype=tf.float32,
                              initializer=tau0, trainable=False)
        diff = xadv - x - tau

        # if all values are smaller than the upper bound value tau, we reduce
        # this value via tau*0.9 to make sure L-inf does not get stuck.
        tau = alpha * tf.to_float(tf.reduce_all(diff < 0, axis=axis))
        loss1 = tf.nn.relu(tf.reduce_sum(diff, axis=axis))

    loss = eps*loss0 + loss1
    train_op = optimizer.minimize(loss, var_list=[noise])

    # We may need to update tau after each iteration.  Refer to the CW-Linf
    # section in the original paper.
    if 2 != ord_:
        train_op = tf.group(train_op, tau)

    return train_op, xadv, noise
Example #54
0
cs = [0] * T  # sequence of canvases
mus, logsigmas, sigmas = [0] * T, [0] * T, [
    0
] * T  # gaussian params generated by SampleQ. We will need these for computing loss.
# initial states
h_dec_prev = tf.zeros((batch_size, dec_size))
enc_state = lstm_enc.zero_state(batch_size, tf.float32)
dec_state = lstm_dec.zero_state(batch_size, tf.float32)

## DRAW MODEL ##

# construct the unrolled computational graph
for t in range(T):
    c_prev = tf.zeros((batch_size, img_size)) if t == 0 else cs[t - 1]
    x_hat = x - tf.sigmoid(c_prev)  # error image
    r = read(x, x_hat, h_dec_prev)
    h_enc, enc_state = encode(enc_state, tf.concat([r, h_dec_prev], 1))
    z, mus[t], logsigmas[t], sigmas[t] = sampleQ(h_enc)
    h_dec, dec_state = decode(dec_state, z)
    cs[t] = c_prev + write(h_dec)  # store results
    h_dec_prev = h_dec
    DO_SHARE = True  # from now on, share variables

## LOSS FUNCTION ##


def binary_crossentropy(t, o):
    return -(t * tf.log(o + eps) + (1.0 - t) * tf.log(1.0 - o + eps))

Example #55
0
def gated_conv2d(X,
                 K_h,
                 K_w,
                 K_c,
                 strides=[1, 1, 1, 1],
                 padding='SAME',
                 mask=None,
                 cond_h=None,
                 vertical_h=None):
    """Summary

    Parameters
    ----------
    X : TYPE
        Description
    K_h : TYPE
        Description
    K_w : TYPE
        Description
    K_c : TYPE
        Description
    strides : list, optional
        Description
    padding : str, optional
        Description
    mask : None, optional
        Description
    cond_h : None, optional
        Description
    vertical_h : None, optional
        Description

    Returns
    -------
    TYPE
        Description
    """
    with tf.variable_scope('masked_cnn'):
        W = tf.get_variable(
            name='W',
            shape=[K_h, K_w, X.shape[-1].value, K_c * 2],
            initializer=tf.contrib.layers.xavier_initializer_conv2d())
        b = tf.get_variable(name='b',
                            shape=[K_c * 2],
                            initializer=tf.zeros_initializer())
        if mask is not None:
            W = tf.multiply(mask, W)
        # Initial convolution with masked kernel
        h = tf.nn.bias_add(
            tf.nn.conv2d(X, W, strides=strides, padding=padding), b)

    # Combine the horizontal stack's pre-activations to our hidden embedding before
    # applying the split nonlinearities.  Check Figure 2 for details.
    if vertical_h is not None:
        with tf.variable_scope('vtoh'):
            W_vtoh = tf.get_variable(
                name='W',
                shape=[1, 1, K_c * 2, K_c * 2],
                initializer=tf.contrib.layers.xavier_initializer_conv2d())
            b_vtob = tf.get_variable(name='b',
                                     shape=[K_c * 2],
                                     initializer=tf.zeros_initializer())
            h = tf.add(
                h,
                tf.nn.bias_add(
                    tf.nn.conv2d(vertical_h,
                                 W_vtoh,
                                 strides=strides,
                                 padding=padding), b_vtob))

    # Condition on some given data
    if cond_h is not None:
        with tf.variable_scope('conditioning'):
            V = tf.get_variable(
                name='V',
                shape=[cond_h.shape[1].value, K_c],
                initializer=tf.contrib.layers.xavier_initializer_conv2d())
            b = tf.get_variable(name='b',
                                shape=[K_c],
                                initializer=tf.zeros_initializer())
            h = tf.add(h,
                       tf.reshape(tf.nn.bias_add(tf.matmul(cond_h, V), b),
                                  tf.shape(X)[0:3] + [K_c]),
                       name='h')

    with tf.variable_scope('gated_cnn'):
        # Finally slice and apply gated multiplier
        h_f = tf.slice(h, [0, 0, 0, 0], [-1, -1, -1, K_c])
        h_g = tf.slice(h, [0, 0, 0, K_c], [-1, -1, -1, K_c])
        y = tf.multiply(tf.nn.tanh(h_f), tf.sigmoid(h_g))

    return y, h
Example #56
0
    # TODO : 1. Create weights and biases of encoder's second layer. (5%)
    # Hint : use n_hidden_2
    encoder_w2 = tf.Variable(tf.truncated_normal([n_hidden_1, n_hidden_2]))
    encoder_b2 = tf.Variable(tf.zeros([n_hidden_2]))

    # TODO : 2. Create weights and biases of encoder's *second* layer. (5%)
    # Hint : pay attention to the symmetry between layers
    decoder_w2 = tf.Variable(tf.truncated_normal([n_hidden_2, n_hidden_1]))
    decoder_b2 = tf.Variable(tf.zeros([n_hidden_1]))

    # Weights and biases of decoder's *first* layer.
    decoder_w1 = tf.Variable(tf.truncated_normal([n_hidden_1, NUM_FEATURES]))
    decoder_b1 = tf.Variable(tf.zeros([NUM_FEATURES]))

    # Training computation.
    encoder_l1 = tf.sigmoid(
        tf.matmul(tf_train_features, encoder_w1) + encoder_b1)
    # TODO : 3. Write the computation of encoder's second layer and decoder's *second* layer. (5%)
    # Hint : similar to encoder_l1 and decoder_l1
    encoder_l2 = tf.sigmoid(tf.matmul(encoder_l1, encoder_w2) + encoder_b2)
    decoder_l2 = tf.sigmoid(tf.matmul(encoder_l2, decoder_w2) + decoder_b2)
    decoder_l1 = tf.sigmoid(tf.matmul(decoder_l2, decoder_w1) + decoder_b1)

    # TODO : 4. Define the loss function. (5%)
    # Hint : use tf.losses.mean_squared_error()
    loss = tf.losses.mean_squared_error(tf_train_features, decoder_l1)

    # TODO : 5. Define a gradient descent optimizer. (5%)
    # Hint : user tf.train.GradientDescentOptimizer(...).minimize(...)
    optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(loss)

# Training process.
Example #57
0
def pixel_flow(x, offset, interpolation='bilinear', name='pixel_flow'):
    """pixel_flow: an operation to reorder pixels according to offsets.

    Args:
        x (tf.Tensor): NHWC
        offset (tf.Tensor): NHW2, 2 indicates (h, w) coordinates offset
        interpolation: bilinear, softmax
        name: name of module

    References
    ----------
    [1] Spatial Transformer Networks: https://arxiv.org/abs/1506.02025
    [2] https://github.com/ppwwyyxx/tensorpack

    """
    def reindex(x, offset):
        offset = tf.cast(offset, tf.int32)
        xs = tf.shape(input=x)
        ofs = tf.shape(input=offset)
        n_add = tf.tile(tf.reshape(tf.range(xs[0]), [xs[0], 1, 1, 1]),
                        [1, xs[1], xs[2], 1])
        h_add = tf.tile(tf.reshape(tf.range(xs[1]), [1, xs[1], 1, 1]),
                        [xs[0], 1, xs[2], 1])
        w_add = tf.tile(tf.reshape(tf.range(xs[2]), [1, 1, xs[2], 1]),
                        [xs[0], xs[1], 1, 1])
        coords = offset + tf.concat([h_add, w_add], axis=3)
        coords = tf.clip_by_value(coords, 0, [xs[1] - 1, xs[2] - 1])
        coords = tf.concat([n_add, coords], axis=3)
        # TODO(Jiahui): gather nd is also too slow.
        sampled = tf.gather_nd(x, coords)
        return sampled

    def reindex_slow(x, offset):
        offset = tf.cast(offset, tf.int32)
        xs = tf.shape(input=x)
        ofs = tf.shape(input=offset)
        n_add = tf.tile(tf.reshape(tf.range(xs[0]), [xs[0], 1, 1, 1]),
                        [1, xs[1], xs[2], 1])
        h_add = tf.tile(tf.reshape(tf.range(xs[1]), [1, xs[1], 1, 1]),
                        [xs[0], 1, xs[2], 1])
        w_add = tf.tile(tf.reshape(tf.range(xs[2]), [1, 1, xs[2], 1]),
                        [xs[0], xs[1], 1, 1])
        coords = offset + tf.concat([h_add, w_add], axis=3)
        coords = tf.clip_by_value(coords, 0, [xs[1] - 1, xs[2] - 1])
        coords = tf.concat([n_add, coords], axis=3)
        x = tf.reshape(x, [-1, xs[3]])
        coords_flat = tf.reshape(coords, [-1, 3])  # (batch, height, width)
        coords_flat = (coords_flat[:, 0] * xs[0] * xs[1] +
                       coords_flat[:, 1] * xs[1] + coords_flat[:, 2])
        sampled = tf.gather(x, coords_flat)
        sampled = tf.reshape(sampled, xs)
        return sampled

    with tf.compat.v1.variable_scope(name):
        assert x.get_shape().ndims == 4 and offset.get_shape().ndims == 4

        l = tf.floor(offset)  # lower
        u = l + 1  # upper
        diff = offset - l
        neg_diff = 1.0 - diff

        lh, lw = tf.split(l, 2, axis=3)
        uh, uw = tf.split(u, 2, axis=3)

        lhuw = tf.concat([lh, uw], axis=3)
        uhlw = tf.concat([uh, lw], axis=3)

        diffh, diffw = tf.split(diff, 2, axis=3)
        neg_diffh, neg_diffw = tf.split(neg_diff, 2, axis=3)
        if interpolation == 'bilinear':
            pass
        elif interpolation == 'softmax':
            scale = 10.
            diffh = tf.sigmoid(scale * (diffh - 0.5))
            diffw = tf.sigmoid(scale * (diffw - 0.5))
            neg_diffh = tf.sigmoid(scale * (neg_diffh - 0.5))
            neg_diffw = tf.sigmoid(scale * (neg_diffw - 0.5))
        else:
            assert NotImplementedError(
                "interpolation method: {} is not implemented.".format(
                    interpolation))

        sampled = tf.add_n([
            reindex(x, l) * neg_diffw * neg_diffh,
            reindex(x, u) * diffw * diffh,
            reindex(x, lhuw) * neg_diffh * diffw,
            reindex(x, uhlw) * diffh * neg_diffw
        ],
                           name='sampled')
        return sampled
Example #58
0
def model_fn(features, labels, mode, params):
    """Build Model function f(x) for Estimator."""
    #------hyper parameters------
    field_size = params['field_size']
    feature_size = params['feature_size']
    embedding_size = params['embedding_size']
    l2_reg = params['l2_reg']
    learning_rate = params['learning_rate']
    dropout = params['dropout']
    layers = params['layers']

    #------build weights------
    Global_Bias = tf.get_variable(name='bias',
                                  shape=[1],
                                  initializer=tf.constant_initializer(0.0))
    Feat_Wgts = tf.get_variable(name='linear',
                                shape=[feature_size],
                                initializer=tf.glorot_normal_initializer())
    Feat_Emb = tf.get_variable(name='emb',
                               shape=[feature_size, embedding_size],
                               initializer=tf.glorot_normal_initializer())

    #------build feature------
    feat_ids = features['feat_ids']
    feat_ids = tf.reshape(feat_ids, shape=[-1, field_size])
    feat_vals = features['feat_vals']
    feat_vals = tf.reshape(feat_vals, shape=[-1, field_size])

    #------build f(x)------
    # f(x) = bias + sum(wx) + MLP(BI(embed_vec))

    # FM部分
    with tf.variable_scope("Linear-part"):
        feat_wgts = tf.nn.embedding_lookup(Feat_Wgts, feat_ids)  # None * F * 1
        y_linear = tf.reduce_sum(tf.multiply(feat_wgts, feat_vals),
                                 1)  # None * 1

    with tf.variable_scope("BiInter-part"):
        embeddings = tf.nn.embedding_lookup(Feat_Emb, feat_ids)  # None * F * k
        feat_vals = tf.reshape(feat_vals, shape=[-1, field_size,
                                                 1])  # None * F * 1
        embeddings = tf.multiply(embeddings, feat_vals)  # vi * xi
        sum_square_emb = tf.square(tf.reduce_sum(embeddings, 1))
        square_sum_emb = tf.reduce_sum(tf.square(embeddings), 1)
        deep_inputs = 0.5 * tf.subtract(sum_square_emb,
                                        square_sum_emb)  # None * k

    with tf.variable_scope("Deep-part"):
        if mode == tf.estimator.ModeKeys.TRAIN:
            train_phase = True
        else:
            train_phase = False

        # BI的输出需要进行Batch Normalization
        deep_inputs = batch_norm_layer(deep_inputs,
                                       train_phase=train_phase,
                                       scope_bn="bn_after_bi")

        # BI的输出进行Dropout
        if mode == tf.estimator.ModeKeys.TRAIN:
            deep_inputs = tf.nn.dropout(
                deep_inputs,
                keep_prob=dropout[-1])  # dropout at bilinear interaction layer

        for i in range(len(layers)):
            deep_inputs = tf.contrib.layers.fully_connected(
                inputs=deep_inputs,
                num_outputs=layers[i],
                weights_regularizer=tf.contrib.layers.l2_regularizer(l2_reg),
                scope="mlp%d" % i)
            # 注意是先进行Batch Norm,再进行Dropout
            # Batch Normalization
            deep_inputs = batch_norm_layer(deep_inputs,
                                           train_phase=train_phase,
                                           scope_bn="bn%d" % i)
            # Dropout
            if mode == tf.estimator.ModeKeys.TRAIN:
                deep_inputs = tf.nn.dropout(deep_inputs, keep_prob=dropout[i])

        # Output
        y_deep = tf.contrib.layers.fully_connected(
            inputs=deep_inputs,
            num_outputs=1,
            activation_fn=tf.identity,
            weights_regularizer=tf.contrib.layers.l2_regularizer(l2_reg),
            scope="deep_out")
        y_d = tf.reshape(y_deep, shape=[-1])

    with tf.variable_scope("NFM-out"):
        y_bias = Global_Bias * tf.ones_like(y_d, dtype=tf.float32)
        y = y_bias + y_linear + y_d
        pred = tf.sigmoid(y)

    predictions = {"prob": pred}

    export_outputs = {
        tf.saved_model.signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY:
        tf.estimator.export.PredictOutput(predictions)
    }
    # Provide an estimator spec for `ModeKeys.PREDICT`
    if mode == tf.estimator.ModeKeys.PREDICT:
        return tf.estimator.EstimatorSpec(mode=mode,
                                          predictions=predictions,
                                          export_outputs=export_outputs)

    #------build loss------
    loss = tf.reduce_mean(
        tf.nn.sigmoid_cross_entropy_with_logits(logits=y, labels=labels)
    ) + l2_reg * tf.nn.l2_loss(Feat_Wgts) + l2_reg * tf.nn.l2_loss(Feat_Emb)

    # Provide an estimator spec for `ModeKeys.EVAL`
    eval_metric_ops = {"auc": tf.metrics.auc(labels, pred)}
    if mode == tf.estimator.ModeKeys.EVAL:
        return tf.estimator.EstimatorSpec(mode=mode,
                                          predictions=predictions,
                                          loss=loss,
                                          eval_metric_ops=eval_metric_ops)

    #------build optimizer------
    optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate,
                                       beta1=0.9,
                                       beta2=0.999,
                                       epsilon=1e-8)

    train_op = optimizer.minimize(loss, global_step=tf.train.get_global_step())

    # Provide an estimator spec for `ModeKeys.TRAIN` modes
    if mode == tf.estimator.ModeKeys.TRAIN:
        return tf.estimator.EstimatorSpec(mode=mode,
                                          predictions=predictions,
                                          loss=loss,
                                          train_op=train_op)
Example #59
0
optimizer = tf.train.AdamOptimizer(0.01, epsilon=1.0)
inference.initialize(optimizer=optimizer)

init = tf.initialize_all_variables()
init.run()

n_epoch = 100
n_iter_per_epoch = 1000
for epoch in range(n_epoch):
    avg_loss = 0.0

    widgets = ["epoch #%d|" % epoch, Percentage(), Bar(), ETA()]
    pbar = ProgressBar(n_iter_per_epoch, widgets=widgets)
    pbar.start()
    for t in range(n_iter_per_epoch):
        pbar.update(t)
        x_train, _ = mnist.train.next_batch(M)
        info_dict = inference.update(feed_dict={x_ph: x_train})
        avg_loss += info_dict['loss']

    # Print a lower bound to the average marginal likelihood for an
    # image.
    avg_loss = avg_loss / n_iter_per_epoch
    avg_loss = avg_loss / M
    print("log p(x) >= {:0.3f}".format(avg_loss))

    # Visualize hidden representations.
    imgs = tf.sigmoid(logits).eval()
    for m in range(M):
        imsave(os.path.join(IMG_DIR, '%d.png') % m, imgs[m].reshape(28, 28))
Example #60
0
])

loss_func = tf.keras.losses.BinaryCrossentropy(from_logits=True)
optimizer = tf.keras.optimizers.Adam()
for e in range(10):
    accuracy = []
    for batch, (text, label) in enumerate(train_data.take(-1)):
        with tf.GradientTape() as tape:
            logits = model(text)
            label = tf.expand_dims(label, 1)
            loss = loss_func(label, logits)

        gradients = tape.gradient(loss, model.trainable_variables)
        grads_and_vars = zip(gradients, model.trainable_variables)
        optimizer.apply_gradients(grads_and_vars)
        predictions = tf.cast(tf.math.greater(tf.sigmoid(logits), 0.5),
                              tf.int64)
        accuracy.extend(
            tf.cast(tf.equal(predictions, label), tf.int64).numpy())

        if batch % 100 == 0:
            print('\nEpoch: {} - Batch: {}'.format(e, batch))
            print('Loss: {:.4f}'.format(loss.numpy()))
            print('Accuracy: {}'.format(np.mean(accuracy)))

            for _, (text, label) in enumerate(test_data.take(1)):
                logits = model(text)

                random_id = np.random.choice(label.shape[0], 5)
                for ix in random_id:
                    print('\n')