Example #1
0
  def compute_accuracy(x, l, mask):
    """Compute model accuracy."""
    preds = ch_model.get_probs(x)
    preds = tf.squeeze(preds)
    preds = tf.argmax(preds, -1, output_type=l.dtype)

    _, acc_update_op = tf.metrics.accuracy(l, preds, weights=mask)

    if FLAGS.surrogate_attack:
      preds = sur_ch_model.get_probs(x)
      preds = tf.squeeze(preds)
      preds = tf.argmax(preds, -1, output_type=l.dtype)
      acc_update_op = tf.tuple((acc_update_op,
                                tf.metrics.accuracy(l, preds, weights=mask)[1]))

    sess.run(tf.initialize_local_variables())
    for i in range(FLAGS.eval_steps):
      tf.logging.info(
          "\tEvaluating batch [%d / %d]" % (i + 1, FLAGS.eval_steps))
      acc = sess.run(acc_update_op)
    if FLAGS.surrogate_attack:
      tf.logging.info("\tFinal acc: (%.4f, %.4f)" % (acc[0], acc[1]))
    else:
      tf.logging.info("\tFinal acc: %.4f" % acc)
    return acc
Example #2
0
def precision_recall(num_gbboxes, num_detections, tp, fp, scores,
                     dtype=tf.float64, scope=None):
    """Compute precision and recall from scores, true positives and false
    positives booleans arrays
    """
    # Input dictionaries: dict outputs as streaming metrics.
    if isinstance(scores, dict):
        d_precision = {}
        d_recall = {}
        for c in num_gbboxes.keys():
            scope = 'precision_recall_%s' % c
            p, r = precision_recall(num_gbboxes[c], num_detections[c],
                                    tp[c], fp[c], scores[c],
                                    dtype, scope)
            d_precision[c] = p
            d_recall[c] = r
        return d_precision, d_recall

    # Sort by score.
    with tf.name_scope(scope, 'precision_recall',
                       [num_gbboxes, num_detections, tp, fp, scores]):
        # Sort detections by score.
        scores, idxes = tf.nn.top_k(scores, k=num_detections, sorted=True)
        tp = tf.gather(tp, idxes)
        fp = tf.gather(fp, idxes)
        # Computer recall and precision.
        tp = tf.cumsum(tf.cast(tp, dtype), axis=0)
        fp = tf.cumsum(tf.cast(fp, dtype), axis=0)
        recall = _safe_div(tp, tf.cast(num_gbboxes, dtype), 'recall')
        precision = _safe_div(tp, tp + fp, 'precision')
        return tf.tuple([precision, recall])
Example #3
0
def _rev_layer_forward(xs, f, g, f_side_input, g_side_input,
                       gate_outputs=False):
  """Forward for 1 reversible layer."""
  x1, x2 = xs
  y1 = x1 + (f(x2, f_side_input) if f_side_input else f(x2))
  y2 = x2 + (g(y1, g_side_input) if g_side_input else g(y1))
  if gate_outputs:
    return tf.tuple([y1, y2])
  else:
    return (y1, y2)
  def testAcceptTensorsAsControlInputs(self):
    with self.test_session():
      var = tf.Variable(0)
      assign = tf.assign(var, 1)
      t, = tf.tuple([tf.constant(0)], control_inputs=[assign])

      # Should trigger the assign.
      t.eval()

      self.assertEquals(1, var.eval())
Example #5
0
def precision_recall(num_gbboxes, tp, fp, scope=None):
    """Compute precision and recall from true positives and false
    positives booleans arrays
    """

    # Sort by score.
    with tf.name_scope(scope, 'precision_recall'):
        # Computer recall and precision.
        tp = tf.reduce_sum(tf.cast(tp, tf.float32), axis=0)
        fp = tf.reduce_sum(tf.cast(fp, tf.float32), axis=0)
        recall = tfe_math.safe_divide(tp, tf.cast(num_gbboxes, tf.float32), 'recall')
        precision = tfe_math.safe_divide(tp, tp + fp, 'precision')
        return tf.tuple([precision, recall])
Example #6
0
    def __init__(self, x, size, selectTrain, sess, toTarget=None, ts=0.001):
        
        self.sess = sess
        self.mean_x_train, self.variance_x_train = moments(x, [0])
        
        #self.mean_x_ma, self.variance_x_ma = moments(self.x_splh, [0])
        
        self.mean_x_ma = tf.Variable(tf.zeros([size]))
        self.variance_x_ma = tf.Variable(tf.ones([size]))

        
        self.update = tf.tuple([self.variance_x_ma.assign(0.95*self.variance_x_ma+ 0.05*self.variance_x_train)] , control_inputs=[self.mean_x_ma.assign(0.95*self.mean_x_ma+ 0.05*self.mean_x_train)])[0]
        self.mean_x_ma_update = tf.tuple([self.mean_x_train] , control_inputs=[])[0]
        self.printUp = tf.Print(self.mean_x_ma_update, [selectTrain], message="selectTrain value : ")
        self.variance_x_ma_update = tf.tuple([self.variance_x_train], control_inputs=[])[0]

        def getxmau(): return self.mean_x_ma_update
        def getxma(): return self.mean_x_ma    
        
        def getvxmau(): return self.variance_x_ma_update
        def getvxma(): return self.variance_x_ma
        
        self.mean_x = tf.cond(selectTrain, getxmau, getxma)
        self.variance_x = tf.cond(selectTrain, getvxmau, getvxma)
        
        self.beta = tf.Variable(tf.zeros([size]))
        self.gamma = tf.Variable(tf.ones([size]))
        
        #tfs.tfs.session.run(tf.initialize_variables([self.beta, self.gamma]))#, self.mean_x_ma, self.variance_x_ma]))
        self.xNorm = tf.reshape(tf.nn.batch_norm_with_global_normalization(tf.reshape(x, [-1, 1, 1, size]), self.mean_x, self.variance_x, self.beta, self.gamma, 0.01, True), [-1, size])
            
        if toTarget!=None:
            self.isTracking = toTarget
            self.updateBeta = self.beta.assign(self.beta*(1-ts)+self.isTracking.beta*ts)
            self.updateGamma = self.gamma.assign(self.gamma*(1-ts)+self.isTracking.gamma*ts)
            self.updateTarget = tf.group(self.updateBeta, self.updateGamma)
Example #7
0
	def create_graph (self):
		# shortcut to make a weight variable with truncated normal distribution
		def weight_variable (shape):
			initial = tf.truncated_normal (shape, stddev=0.1)
			return tf.Variable (initial)

		# shortcut for making bias variables with 0.1 starting constant
		def bias_variable (shape):
			initial = tf.constant (0.1, shape=shape)
			return tf.Variable (initial)

		grid_input_a = tf.reshape (self.inputs_a, [-1, 4, 4, 1])
		grid_input_b = tf.reshape (self.inputs_b, [-1, 4, 4, 1])

		filter_1 = weight_variable ([2, 2, 1, 16])
		biases_1 = bias_variable ([16])
		conv_1_a = tf.nn.conv2d (grid_input_a, filter=filter_1, strides=[1, 1, 1, 1], padding='VALID') + biases_1
		conv_1_b = tf.nn.conv2d (grid_input_b, filter=filter_1, strides=[1, 1, 1, 1], padding='VALID') + biases_1

		relu_1_a = tf.nn.relu (conv_1_a)
		relu_1_b = tf.nn.relu (conv_1_b)

		filter_2 = weight_variable ([2, 2, 16, 32])
		biases_2 = bias_variable ([32])

		conv_2_a = tf.nn.conv2d (relu_1_a, filter=filter_2, strides=[1, 1, 1, 1], padding='VALID') + biases_2
		conv_2_b = tf.nn.conv2d (relu_1_b, filter=filter_2, strides=[1, 1, 1, 1], padding='VALID') + biases_2

		relu_2_a = tf.nn.relu (conv_2_a)
		relu_2_b = tf.nn.relu (conv_2_b)

		side_length = 2 * 2 * 32

		lin_a = tf.reshape (relu_2_a, [-1, side_length])
		lin_b = tf.reshape (relu_2_b, [-1, side_length])

		lin_all = tf.concat (1, [lin_a, lin_b])
		lin_all_synced = tf.tuple ([lin_all]) [0]

		fc_1_w = weight_variable ([side_length * 2, 1024])
		fc_1_b = bias_variable ([1024])
		fc_1 = tf.matmul (lin_all_synced, fc_1_w) + fc_1_b

		fc_2_w = weight_variable ([1024, 2])
		fc_2_b = bias_variable ([2])
		fc_2 = tf.matmul (fc_1, fc_2_w) + fc_2_b

		self.readout = tf.nn.softmax (fc_2)
Example #8
0
def _rev_layer_backward(ys, grad_ys, f, g, f_vars, f_side_input, g_vars,
                        g_side_input):
  """Backprop for 1 layer."""
  y1, y2 = ys
  grad_y1, grad_y2 = grad_ys

  # Reconstruct intermediates and inputs (x1, x2)
  # stop_gradients required on fn inputs to prevent infinite recursion into this
  # grad function on the calls to tf.gradients.
  y1_stop = tf.stop_gradient(y1)
  g_side_input = [tf.stop_gradient(t) for t in g_side_input]
  gy1 = g(y1_stop, g_side_input) if g_side_input else g(y1_stop)

  x2 = y2 - gy1
  x2_stop = tf.stop_gradient(x2)
  f_side_input = [tf.stop_gradient(t) for t in f_side_input]
  fx2 = f(x2_stop, f_side_input) if f_side_input else f(x2_stop)

  x1 = y1 - fx2

  # Compute gradients wrt to inputs
  # dL/dy2 * dG(y1)/y1
  grad_gy1_y2 = tf.gradients(gy1, y1_stop, grad_y2)[0]
  grad_x1 = grad_y1 + grad_gy1_y2
  grad_x2 = (
      tf.gradients(fx2, x2_stop, grad_y1)[0] + grad_y2 +
      tf.gradients(fx2, x2_stop, grad_gy1_y2)[0])

  # Compute gradients wrt to vars and side inputs in f and g
  grads1 = tf.gradients(gy1, g_vars + g_side_input, grad_y2)
  grad_g_vars, grad_g_side = grads1[:len(g_vars)], grads1[len(g_vars):]
  grads2 = tf.gradients(fx2, f_vars + f_side_input, grad_y1)
  grad_f_y1, grad_f_side1 = grads2[:len(f_vars)], grads2[len(f_vars):]
  grads3 = tf.gradients(fx2, f_vars + f_side_input, grad_gy1_y2)
  grad_f_y2, grad_f_side2 = grads3[:len(f_vars)], grads3[len(f_vars):]
  grad_f_vars = _acc_grads(grad_f_y1, grad_f_y2)

  grad_f_side = _acc_grads(grad_f_side1, grad_f_side2)

  # Put returns in a tuple to ensure a constant memory budget (i.e. don't want
  # the subsequent layer to start computing and consuming memory based on a
  # subset of these values).
  outputs = ((x1, x2), (grad_x1, grad_x2), (grad_f_vars, grad_f_side),
             (grad_g_vars, grad_g_side))
  tupled = tf.tuple(tf.contrib.framework.nest.flatten(outputs))
  return tf.contrib.framework.nest.pack_sequence_as(outputs, tupled)
Example #9
0
def _precision_recall(n_gbboxes, n_detections, scores, tp, fp, scope=None):
    """Compute precision and recall from scores, true positives and false
    positives booleans arrays
    """
    # Sort by score.
    with tf.name_scope(scope, 'prec_rec', [n_gbboxes, scores, tp, fp]):
        # Sort detections by score.
        scores, idxes = tf.nn.top_k(scores, k=n_detections, sorted=True)
        tp = tf.gather(tp, idxes)
        fp = tf.gather(fp, idxes)
        # Computer recall and precision.
        dtype = tf.float64
        tp = tf.cumsum(tf.cast(tp, dtype), axis=0)
        fp = tf.cumsum(tf.cast(fp, dtype), axis=0)
        recall = _safe_div(tp, tf.cast(n_gbboxes, dtype), 'recall')
        precision = _safe_div(tp, tp + fp, 'precision')

        return tf.tuple([precision, recall])
Example #10
0
    def _get_step(self, inputs):
        Z, Y, X, theta, lmbd = self.inputs
        K, p = self.D.shape
        L = self.L
        with tf.name_scope("ISTA_iteration"):
            self.S = tf.constant(np.eye(K, dtype=np.float32) - self.S0/L,
                                 shape=[K, K], name='S')
            self.We = tf.constant(self.D.T/L, shape=[p, K],
                                  dtype=tf.float32, name='We')
            hk = tf.matmul(Y, self.S) + tf.matmul(X, self.We)
            self.step_FISTA = Zk = soft_thresholding(hk, lmbd/L)
            # self.theta_k = tk = (tf.sqrt(theta*theta+4) - theta)*theta/2
            self.theta_k = tk = (1 + tf.sqrt(1 + 4*theta*theta))/2
            dZ = tf.subtract(Zk, Z)
            # self.Yk = Zk + tk*(1/theta-1)*dZ
            self.Yk = Zk + (theta-1)/tk*dZ
            self.dz = tf.reduce_mean(tf.reduce_sum(
                dZ*dZ, reduction_indices=[1]))

            step = tf.tuple([Zk, tk, self.Yk])
        return step, self.dz
Example #11
0
def precision_recall_values(xvals, precision, recall, name=None):
    """Compute values on the precision/recall curve.

    Args:
      x: Python list of floats;
      precision: 1D Tensor decreasing.
      recall: 1D Tensor increasing.
    Return:
      list of precision values.
    """
    with ops.name_scope(name, "precision_recall_values",
                        [precision, recall]) as name:
        # Add bounds values to precision and recall.
        precision = tf.concat([[0.], precision, [0.]], axis=0)
        recall = tf.concat([[0.], recall, [1.]], axis=0)
        precision = tfe_math.cummax(precision, reverse=True)

        prec_values = []
        for x in xvals:
            mask = tf.less_equal(recall, x)
            val = tf.reduce_min(tf.boolean_mask(precision, mask))
            prec_values.append(val)
        return tf.tuple(prec_values)
Example #12
0
def build_train_MSR_face_graph_multi_gpu(batch_size=64,
                                         num_gpus=4,
                                         latent_dims=1024,
                                         lr_g=1e-4,
                                         lr_c=5e-5,
                                         clamp_lower=-0.01,
                                         clamp_upper=0.01,
                                         use_gradient_penalty=True,
                                         stddev=0.02,
                                         norm_val=10):
    from data_utils import get_cartoon_faces
    with tf.device('/cpu:0'):
        phase = tf.placeholder(tf.bool)

        opt_g = tf.train.RMSPropOptimizer(lr_g)
        opt_c = tf.train.RMSPropOptimizer(lr_c)
        real_img = get_cartoon_faces(batch_size)
        batch_queue = tf.contrib.slim.prefetch_queue.prefetch_queue(
            [real_img], capacity=2 * num_gpus)

        tower_grads_c = []
        tower_grads_g = []
        tower_c_losses = []
        with tf.variable_scope(tf.get_variable_scope()):
            for i in range(num_gpus):
                image_batch = batch_queue.dequeue()
                with tf.device('/gpu:%d' % i):
                    with tf.name_scope('%s_%d' % ('tower', i)):
                        z = tf.random_uniform([batch_size, latent_dims], -1, 1)

                        with tf.variable_scope('generator'):
                            generate_img = generator(
                                z, [4, 4, 1024], [64, 64, 3],
                                tf.tanh,
                                tf.random_normal_initializer(stddev=stddev),
                                kernel_size=5,
                                for_train=phase)

                            tf.get_variable_scope().reuse_variables()

                        fake_logit = critic(
                            generate_img,
                            64,
                            4,
                            tf.truncated_normal_initializer(stddev=stddev),
                            kernel_size=5,
                            reuse=(True if i >= 1 else False))
                        true_logit = critic(
                            image_batch,
                            64,
                            4,
                            tf.truncated_normal_initializer(stddev=stddev),
                            kernel_size=5,
                            reuse=True)

                        tf.get_variable_scope().reuse_variables()

                        theta_g = tf.get_collection(
                            tf.GraphKeys.TRAINABLE_VARIABLES,
                            scope='generator')
                        theta_c = tf.get_collection(
                            tf.GraphKeys.TRAINABLE_VARIABLES, scope='critic')

                        c_loss = tf.reduce_mean(fake_logit - true_logit,
                                                name='c_loss')
                        g_loss = tf.reduce_mean(-fake_logit, name='g_loss')

                        if use_gradient_penalty:
                            alpha = tf.random_uniform(
                                shape=[batch_size, 1, 1, 1],
                                minval=0.,
                                maxval=1.)
                            x_hat = generate_img * alpha + (
                                1.0 - alpha) * image_batch
                            d_hat = critic(
                                x_hat, 64, 4,
                                tf.truncated_normal_initializer(stddev=stddev),
                                5, True)
                            gradients = tf.gradients(d_hat, [x_hat])[0]
                            print(gradients)
                            ddx = tf.sqrt(
                                tf.reduce_sum(tf.square(gradients),
                                              axis=[1, 2, 3]))
                            ddx = tf.reduce_mean(
                                tf.square(ddx - tf.constant(1, tf.float32))
                            ) * tf.constant(norm_val, tf.float32)
                            c_loss = c_loss + ddx

                        tower_c_losses.append(c_loss)

                        tower_grads_c.append(
                            opt_c.compute_gradients(c_loss, var_list=theta_c))
                        tower_grads_g.append(
                            opt_g.compute_gradients(g_loss, var_list=theta_g))
                        print(tower_grads_c[-1])
                        exit(0)

        average_grads_c = average_gradients(tower_grads_c)
        average_grads_g = average_gradients(tower_grads_g)

        total_c_loss = average_loss(tower_c_losses)

        tf.summary.scalar("critic_loss", total_c_loss)

        # for g in average_grads_g:
        #     variable_summaries(g[0],g[0].name)

        tf.summary.image('img', generate_img, max_outputs=10)
        variable_summaries(generate_img, 'generated_img')

        apply_gradient_c = opt_c.apply_gradients(average_grads_c)

        update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
        # add dependency of updating moving statistics of batch normalization
        with tf.control_dependencies(update_ops):
            apply_gradient_g = opt_g.apply_gradients(average_grads_g)

        if not use_gradient_penalty:
            theta_c = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES,
                                        scope='critic')
            clipped_var_c = [
                tf.assign(var, tf.clip_by_value(var, clamp_lower, clamp_upper))
                for var in theta_c
            ]
            # merge the clip operations on critic variables
            with tf.control_dependencies([apply_gradient_c]):
                apply_gradient_c = tf.tuple(clipped_var_c)

    return apply_gradient_g, apply_gradient_c, total_c_loss, phase
Example #13
0
 def get_state_update_op(state_variables, new_states):
     update_ops = []
     for state_variable, new_state in zip(state_variables, new_states):
         update_ops.extend([state_variable[0] == new_state[0],
                            state_variable[1] == new_state[1]])
     return tf.tuple(update_ops)
Example #14
0
    def __call__(self, dataset, moving_params=None):
        """"""

        vocabs = dataset.vocabs
        inputs = dataset.inputs
        targets = dataset.targets

        reuse = (moving_params is not None)
        self.tokens_to_keep3D = tf.expand_dims(
            tf.to_float(tf.greater(inputs[:, :, 0], vocabs[0].ROOT)), 2)
        self.sequence_lengths = tf.reshape(
            tf.reduce_sum(self.tokens_to_keep3D, [1, 2]), [-1, 1])
        self.n_tokens = tf.reduce_sum(self.sequence_lengths)
        self.moving_params = moving_params

        word_inputs, pret_inputs = vocabs[0].embedding_lookup(
            inputs[:, :, 0], inputs[:, :, 1], moving_params=self.moving_params)

        top_recur = embed_inputs = self.embed_concat(word_inputs + pret_inputs)
        for i in xrange(self.n_recur):
            with tf.variable_scope('RNN%d' % i, reuse=reuse):
                top_recur, _ = self.RNN(top_recur)

        with tf.variable_scope('MLP', reuse=reuse):
            dep_mlp, head_mlp = self.MLP(top_recur,
                                         self.class_mlp_size +
                                         self.attn_mlp_size,
                                         n_splits=2)
            dep_arc_mlp, dep_rel_mlp = dep_mlp[:, :, :self.
                                               attn_mlp_size], dep_mlp[:, :,
                                                                       self.
                                                                       attn_mlp_size:]
            head_arc_mlp, head_rel_mlp = head_mlp[:, :, :self.
                                                  attn_mlp_size], head_mlp[:, :,
                                                                           self
                                                                           .
                                                                           attn_mlp_size:]

        with tf.variable_scope('Arcs', reuse=reuse):
            arc_logits = self.bilinear_classifier(dep_arc_mlp, head_arc_mlp)
            arc_output = self.output(arc_logits, targets[:, :, 1])
            if moving_params is None:
                predictions = targets[:, :, 1]
            else:
                predictions = arc_output['predictions']
        with tf.variable_scope('Rels', reuse=reuse):
            rel_logits, rel_logits_cond = self.conditional_bilinear_classifier(
                dep_rel_mlp, head_rel_mlp, len(vocabs[2]), predictions)
            rel_output = self.output(rel_logits, targets[:, :, 2])
            rel_output['probabilities'] = self.conditional_probabilities(
                rel_logits_cond)

        output = {}
        output['probabilities'] = tf.tuple(
            [arc_output['probabilities'], rel_output['probabilities']])
        output['predictions'] = tf.pack(
            [arc_output['predictions'], rel_output['predictions']])
        output['correct'] = arc_output['correct'] * rel_output['correct']
        output['tokens'] = arc_output['tokens']
        output['n_correct'] = tf.reduce_sum(output['correct'])
        output['n_tokens'] = self.n_tokens
        output['accuracy'] = output['n_correct'] / output['n_tokens']
        output['loss'] = arc_output['loss'] + rel_output['loss']
        if self.word_l2_reg > 0:
            output['loss'] += word_loss

        output['embed'] = embed_inputs
        output['recur'] = top_recur
        output['dep_arc'] = dep_arc_mlp
        output['head_dep'] = head_arc_mlp
        output['dep_rel'] = dep_rel_mlp
        output['head_rel'] = head_rel_mlp
        output['arc_logits'] = arc_logits
        output['rel_logits'] = rel_logits
        return output
Example #15
0
    def gridlstm_def(self, rnn_input, seq_len):
        with tf.variable_scope('GridLSTM'):
            def gridlstm_cell():
                return tf.contrib.rnn.GridLSTMCell(self.grid_num_units,
                        use_peepholes=self.use_peepholes,
                        feature_size=self.grid_feature_size,
                        frequency_skip=self.grid_frequency_skip,
                        num_frequency_blocks=[self.num_frequency_blocks],
                        state_is_tuple=self.state_is_tuple,
                        reuse=tf.get_variable_scope().reuse)
        cell = gridlstm_cell()
        '''    state_variables = []
            for state_c, state_h in cell.zero_state(self.batch_size, tf.float32):
                state_variables.append(tf.contrib.rnn.LSTMStateTuple(
                    tf.Variable(state_c, trainable=False),
                    tf.Variable(state_h, trainable=False)))
            # Return as a tuple, so that it can be fed to dynamic_rnn as an initial state
            rnn_tuple_state = tuple(state_variables)
        '''
        state_value = tf.Variable(
                np.zeros((self.batch_size, self.grid_num_units),dtype=np.float32),
                trainable=False,
                dtype=tf.float32)
        #state_value = tf.constant(
        #        np.zeros((self.batch_size,64),dtype=np.float32),
        #        dtype=tf.float32)
        gridrnn_tuple_state = cell.state_tuple_type(
                *([state_value,state_value] * self.num_frequency_blocks))

        # Build the RNN
        with tf.name_scope("GridLSTM"):
            rnn_outputs, new_states = tf.nn.dynamic_rnn(cell=cell,
                    inputs=rnn_input,
                    sequence_length=seq_len,
                    initial_state=gridrnn_tuple_state,
                    dtype=tf.float32,
                    time_major=self.time_major)

        update_ops = []
        for state_variable, new_state in zip(gridrnn_tuple_state, new_states):
            # Assign the new state to the state variables on this layer
            update_ops.extend([state_variable[0].assign(new_state[0]),
                state_variable[1].assign(new_state[1])])
        # Return a tuple in order to combine all update_ops into a single operation.
        # The tuple's actual value should not be used.
        gridrnn_keep_state_op = tf.tuple(update_ops)
        
        # Define an op to reset the hidden state to zeros
        update_ops = []
        for state_variable in gridrnn_tuple_state:
            # Assign the new state to the state variables on this layer
            update_ops.extend([state_variable[0].assign(tf.zeros_like(state_variable[0])),
                state_variable[1].assign(tf.zeros_like(state_variable[1]))])
        # Return a tuple in order to combine all update_ops into a single operation.
        # The tuple's actual value should not be used.
        gridrnn_state_zero_op = tf.tuple(update_ops)
        
        if not self.time_major:
            rnn_outputs = tf.transpose(rnn_outputs, [1, 0, 2]) # [time, batch_size, cell_outdim]

        return rnn_outputs, gridrnn_keep_state_op, gridrnn_state_zero_op
        print(batch_size,self.proj_dim,self.output_size,seq_len.shape)
        logits = self.AffineTransform(rnn_outputs)
        
        return logits, gridrnn_keep_state_op, gridrnn_state_zero_op
Example #16
0
	def __init__(self, hidden_size = 75, embedding_size = 300, is_training= True):
		self.start_index = tf.placeholder(tf.int32, [None])                     #[batch_size]
		self.stop_index = tf.placeholder(tf.int32, [None]) 		                #[batch_size]
		#self.dropout_rate = tf.placeholder(tf.int32 , [1])			
		input_dim = 0
		

		with tf.name_scope("word-rep"):
			self.question_repres = tf.placeholder(tf.float32, [None, None, embedding_size])   # [batch_size, question_len, word_dim]
			self.passage_repres = tf.placeholder(tf.float32, [None, None, embedding_size])    # [batch_size, passage_len, word_dim]

			self.question_lengths = get_length(self.question_repres)				#[batch_size]
			self.passage_lengths = get_length(self.passage_repres)					#[batch_size]

			input_shape = tf.shape(self.question_repres)
			batch_size = input_shape[0]
			batch_size = tf.cast(batch_size, tf.int32)

			question_len = input_shape[1]
			input_shape = tf.shape(self.passage_repres)
			passage_len = input_shape[1]
			input_dim += input_shape[2]
		"""
		
			if with_char and char_vocab is not None:
				self.question_char_lengths = tf.placeholder(tf.int32, [None,None]) # [batch_size, question_len]
				self.passage_char_lengths = tf.placeholder(tf.int32, [None,None]) # [batch_size, passage_len]
				self.question_chars = tf.placeholder(tf.int32, [None, None, None]) # [batch_size, question_len, q_char_len]
				self.passage_chars = tf.placeholder(tf.int32, [None, None, None]) # [batch_size, passage_len, p_char_len]
				input_shape = tf.shape(self.question_chars)
				batch_size = input_shape[0]
				question_len = input_shape[1]
				q_char_len = input_shape[2]
				input_shape = tf.shape(self.passage_chars)
				passage_len = input_shape[1]
				p_char_len = input_shape[2]
				char_dim = char_vocab.word_dim
				self.char_embedding = tf.get_variable("char_embedding", initializer=tf.constant(char_vocab.word_vecs), 
					dtype=tf.float32)
				question_char_repres = tf.nn.embedding_lookup(self.char_embedding, self.question_chars) # [batch_size, question_len, q_char_len, char_dim]
				question_char_repres = tf.reshape(question_char_repres, shape=[-1, q_char_len, char_dim])
				question_char_lengths = tf.reshape(self.question_char_lengths, [-1])
				passage_char_repres = tf.nn.embedding_lookup(self.char_embedding, self.passage_chars) # [batch_size, passage_len, p_char_len, char_dim]
				passage_char_repres = tf.reshape(passage_char_repres, shape=[-1, p_char_len, char_dim])
				passage_char_lengths = tf.reshape(self.passage_char_lengths, [-1])
				with tf.variable_scope('char_lstm'):
					# lstm cell
					char_lstm_cell = tf.nn.rnn_cell.BasicLSTMCell(char_lstm_dim)
					# dropout
					if is_training: char_lstm_cell = tf.nn.rnn_cell.DropoutWrapper(char_lstm_cell, 
						output_keep_prob=(1 - dropout_rate))
					char_lstm_cell = tf.nn.rnn_cell.MultiRNNCell([char_lstm_cell])
					# question_representation
					question_char_outputs = my_rnn.dynamic_rnn(char_lstm_cell, question_char_repres, 
							sequence_length=question_char_lengths,dtype=tf.float32)[0] # [batch_size*question_len, q_char_len, char_lstm_dim]
					question_char_outputs = question_char_outputs[:,-1,:]
					question_char_outputs = tf.reshape(question_char_outputs, [batch_size, question_len, char_lstm_dim])
				 
					tf.get_variable_scope().reuse_variables()
					# passage representation
					passage_char_outputs = my_rnn.dynamic_rnn(char_lstm_cell, passage_char_repres, 
							sequence_length=passage_char_lengths,dtype=tf.float32)[0] # [batch_size*question_len, q_char_len, char_lstm_dim]
					passage_char_outputs = passage_char_outputs[:,-1,:]
					passage_char_outputs = tf.reshape(passage_char_outputs, [batch_size, passage_len, char_lstm_dim])
					
				question_repres.append(question_char_outputs)
				passage_repres.append(passage_char_outputs)
				input_dim += char_lstm_dim
		question_repres = tf.concat(2, question_repres) # [batch_size, question_len, dim]
		passage_repres = tf.concat(2, passage_repres) # [batch_size, passage_len, dim]
		"""

		#if is_training:
		#	self.question_repres = tf.nn.dropout(self.question_repres, (1 - self.dropout_rate))
		#	self.passage_repres = tf.nn.dropout(self.passage_repres, (1 - self.dropout_rate))
		#else:
		#	self.question_repres = tf.mul(self.question_repres, (1 - self.dropout_rate))
		#	self.passage_repres = tf.mul(self.passage_repres, (1 - self.dropout_rate))
		
		passage_mask = tf.sequence_mask(self.passage_lengths, passage_len, dtype=tf.float32) # [batch_size, passage_len]
		question_mask = tf.sequence_mask(self.question_lengths, question_len, dtype=tf.float32) # [batch_size, question_len]

		# - sequence length helper function
		def seq_len(seq):
			seq_bool = tf.sign(tf.abs(seq))
			return tf.reduce_sum(seq_bool, axis=-1)

		with tf.name_scope("q-p_encoder"):
			with tf.variable_scope("passage-encoder"):
				#W = tf.Variable(tf.truncated_normal(shape = [], stddev=0.05),name = "w")
				#b = tf.Variable(tf.constant(0.1, shape=[]),name="b")

				fcell = tf.contrib.rnn.BasicLSTMCell(hidden_size)
				bcell = tf.contrib.rnn.BasicLSTMCell(hidden_size)
				u_p,_ = tf.nn.dynamic_rnn(fcell, inputs = self.passage_repres,dtype= tf.float32, sequence_length = self.passage_lengths)
			
			with tf.variable_scope("question-encoder"):
				#W = tf.Variable(tf.truncated_normal(shape, stddev=0.05),name = "w")
				#b = tf.Variable(tf.constant(0.1, shape=[]),name="b")

				fcell = tf.contrib.rnn.BasicLSTMCell(hidden_size)
				bcell = tf.contrib.rnn.BasicLSTMCell(hidden_size)

				u_q,_ = tf.nn.dynamic_rnn(fcell, inputs =self.question_repres,dtype= tf.float32, sequence_length = self.question_lengths)
		
		# i : batch_number , k : question_len_number
		#unstacked_u_q = tf.unstack(u_q, axis = 0,num = 10)
		#unstacked_u_p = tf.unstack(u_p,axis = 0,num = 10)
		#print(unstacked_u_q)
		with tf.name_scope("q-p_attention"):
			lstm_m_cell = tf.contrib.rnn.BasicLSTMCell(num_units=hidden_size)

			def match_attention(k, q_i, p_i, len_q_i, state, batch_tensor):
				p_i_k = tf.reshape(p_i[k], [1, -1])
				q_i_k = tf.slice(q_i, begin=[0,0], size=[len_q_i, hidden_size])
				
				with tf.variable_scope('attn_weights'):
					w_s = tf.get_variable(shape=[hidden_size, hidden_size],
										   name='w_s')
					w_t = tf.get_variable(shape=[hidden_size, hidden_size],
										   name='w_t')
					w_m = tf.get_variable(shape=[hidden_size, hidden_size],
										   name='w_m')
					w_e = tf.get_variable(shape=[hidden_size, 1],
										  name='w_e')

				m_lstm_state = tf.reshape(state.h,[1,-1])
				sum_m = tf.matmul(q_i_k, w_s) + tf.matmul(p_i_k, w_t) + tf.matmul(m_lstm_state, w_m)
				s_k = tf.matmul(tf.tanh(sum_m), w_e)

				exps = tf.reshape(tf.exp(s_k), [len_q_i])
				alphas = exps / tf.reshape(tf.reduce_sum(exps, 0), [1])
				a_k = tf.reduce_sum(q_i* tf.reshape(alphas, [len_q_i, 1]), 0)

				a_k = tf.reshape(a_k, [1,hidden_size])
				m_k = tf.concat([p_i_k , a_k], axis=1)
				with tf.variable_scope('lstm_m_step'):
					out, next_state = lstm_m_cell(inputs=m_k, state=state)
				
				batch_tensor = batch_tensor.write(k,out)
				k = tf.add(k,1)
				return k, q_i, p_i, len_q_i, next_state, batch_tensor

			def match_sentence(i, h_m_ta):
				#p_emb_i, h_emb_i = u_q[i], u_p[i]
				p_i = u_p[i]							#q_i :[question_len,input_dim] , p_i:[passage_len,input_dim]
				q_i = u_q[i]
				
				len_q_i, len_p_i = seq_len(question_mask[i]), seq_len(passage_mask[i])
				len_q_i = tf.cast(len_q_i, tf.int32)
				len_p_i = tf.cast(len_p_i, tf.int32)  
				state = lstm_m_cell.zero_state(batch_size=1, dtype=tf.float32)
				batch_tensor = tf.TensorArray(dtype=tf.float32, size=tf.cast(len_q_i, tf.int32))
				# inner loop
				k = tf.constant(0)
				c = lambda a, x, y, z, s, u: tf.less(a, tf.cast(len_q_i, tf.int32))
				b = lambda a,x,y,z,s,u  : match_attention(a,x,y,z,s,u)
				res = tf.while_loop(cond=c, body=b, 
								   loop_vars=(k, q_i, p_i, len_q_i, state, batch_tensor))
				
				temp = tf.squeeze(res[-1].stack(),axis = 1)
				h_m_ta = h_m_ta.write(i, temp)
				
				i = tf.add(i,1)
				
				
				return i, h_m_ta

			with tf.variable_scope('lstm_matching'):
				h_m_ta = tf.TensorArray(dtype=tf.float32, size=batch_size)
				
				#h_m_ta = np.array([10,15,75])
				c = lambda x ,y: tf.less(x, batch_size)
				b = lambda x ,y: match_sentence(x,y)
				i = tf.constant(0)
				h_m_res = tf.while_loop(cond=c, body=b,
									   loop_vars = (i, h_m_ta))
				
				v_p = h_m_res[-1].stack()

		with tf.name_scope("self-matching"):
			bilstm_cell = tf.contrib.rnn.BasicLSTMCell(hidden_size)
			
			def self_match_attention(t,p_i,len_p_i,state,batch_val):
				v_p_t = tf.reshape(p_i[t],[1,-1])
				v_p = p_i
				with tf.variable_scope("w"):
					w_v_p = tf.get_variable(shape = [hidden_size, hidden_size],
								  name = "w_v_p")
					w_v_p_ = tf.get_variable(shape = [hidden_size, hidden_size],
								  name = "w_v_p_")
					w_v_e = tf.get_variable(shape = [hidden_size,1],
								  name = "w_v_e")

				bilstm_state = tf.reshape(state.h,[1,-1])
				sum_m = tf.matmul(v_p,w_v_p) 
				sum_m += tf.matmul(v_p_t,w_v_p_)
				s_t = tf.matmul(tf.tanh(sum_m),w_v_e)
				exps = tf.reshape(tf.exp(s_t), [len_p_i])

				alphas = exps / tf.reshape(tf.reduce_sum(exps, 0), [1])
				a_t = tf.reduce_sum(p_i* tf.reshape(alphas, [len_p_i, 1]), 0)

				a_t = tf.reshape(a_t, [1,hidden_size])
				m_t = tf.concat([a_t, v_p_t], axis=1)
				with tf.variable_scope('lstm_m_step'):
					out, next_state = bilstm_cell(inputs=m_t, state=state)
				
				batch_val = batch_val.write(t,out)
				t = tf.add(t,1)

				return t,p_i,len_p_i,next_state,batch_val

			def self_match_sentence(i,h):
				p_i = v_p[i]
				len_p_i = tf.cast(seq_len(passage_mask[i]),tf.int32)

				state = bilstm_cell.zero_state(batch_size =1, dtype = tf.float32)
				batch_val = tf.TensorArray(dtype=tf.float32, size=1)

				t = tf.constant(0)
				c = lambda a, x, y, z, s  : tf.less(a, len_p_i)
				b = lambda a, x, y, z, s  : self_match_attention(a, x, y, z, s)

				res = tf.while_loop(cond = c, body =b,
									loop_vars = (t,p_i,len_p_i,state,batch_val))

				tem = tf.squeeze(res[-1].stack(),axis=1)
				h = h.write(i,tem)
				i = tf.add(i,1)
				return i,h

			with tf.name_scope("lstm_self_matching"):
				h = tf.TensorArray(dtype=tf.float32, size=batch_size)
				c = lambda x,y : tf.less(x,tf.cast(batch_size, tf.int32))
				b = lambda x,y : self_match_sentence(x,y)
				i = tf.constant(0)
				res = tf.while_loop(cond = c, body = b,
									 loop_vars = (i,h))
				h_p = res[-1].stack()
				print(h_p)
		

		with tf.variable_scope("output_layer"):
			with tf.name_scope("intial_state"):
				with tf.variable_scope("par"):
					w_v_q = tf.get_variable(shape = [hidden_size, hidden_size],name = 'w_v_q')
					w_u_q = tf.get_variable(shape = [hidden_size, hidden_size],name = 'w_u_q')
					V_r_q = tf.get_variable(shape = [15, hidden_size],name = 'V_r_q')				#15 : question_len
					e = tf.get_variable(shape = [hidden_size,1],name = 'e')
				shape_u_q = tf.shape(u_q)
				sum_m = tf.reshape(tf.matmul(tf.reshape(u_q,[-1,hidden_size]),w_u_q),shape_u_q)
				sum_m += tf.matmul(V_r_q,w_v_q)
				s = tf.matmul(tf.reshape(tf.tanh(sum_m),[-1,hidden_size]),e)  # [bs*len,1]
				exps = tf.reshape(tf.exp(s), [-1, question_len])
				alphas = exps / tf.reshape(tf.reduce_sum(exps, 1), [-1, 1])
				initial_s = tf.reduce_sum(u_q * tf.reshape(alphas, [-1, question_len, 1]), 1)  #[batch_size,hidden_size]
				c_ = tf.zeros(shape = tf.shape(initial_s), dtype = tf.float32)
				

			with tf.name_scope("answer_recurrent_network"):
				answer_lstm = tf.contrib.rnn.BasicLSTMCell(hidden_size)
				
				predictions = []
				shape_h_p = tf.shape(h_p)
				with tf.variable_scope('wi'):
					w_h_p = tf.get_variable(shape = [hidden_size,hidden_size], name = "w_h_p")
					w_h_a = tf.get_variable(shape = [hidden_size,hidden_size], name = "w_h_a")
					w_h_e = tf.get_variable(shape = [hidden_size,1], name = "w_h_e")
				for i in range(2):
					if(i==0):
						sum_m = tf.reshape(tf.reshape(tf.matmul(tf.reshape(h_p,[-1,hidden_size]),w_h_p),shape_h_p) + tf.matmul(initial_s,w_h_a),[-1,hidden_size])
						s = tf.matmul(tf.tanh(sum_m),e)
						exps = tf.reshape(tf.exp(s), [-1, passage_len])
						alphas = exps / tf.reshape(tf.reduce_sum(exps, 1), [-1, 1])
						predictions.append(alphas)
						alphas = tf.reshape(alphas, [-1,passage_len,1])
						#a_k = tf.reduce_sum(q_i* tf.reshape(alphas, [len_q_i, 1]), 0)
						input_a = tf.reduce_sum(h_p*alphas, 1)
						
						initial_s = tf.tuple([initial_s,initial_s])
					else:
						sum_m = tf.reshape(tf.reshape(tf.matmul(tf.reshape(h_p,[-1,hidden_size]),w_h_p),shape_h_p) + tf.matmul(initial_s.h,w_h_a),[-1,hidden_size])
						s = tf.matmul(tf.tanh(sum_m),e)
						exps = tf.reshape(tf.exp(s), [-1, passage_len])
						alphas = exps / tf.reshape(tf.reduce_sum(exps, 1), [-1, 1])
						predictions.append(alphas)
						alphas = tf.reshape(alphas, [-1,passage_len,1])
						#a_k = tf.reduce_sum(q_i* tf.reshape(alphas, [len_q_i, 1]), 0)
						input_a = tf.reduce_sum(h_p*alphas, 1)
					
					_, initial_s = answer_lstm.call(input_a ,initial_s)
					

		with tf.name_scope("loss"):
			pred_start = predictions[0]   # [batch_size, passage_len]
			pred_end = predictions[1]     #  [batch_size , passage_len]

			def calc_loss(pred, ind):
				loss = 0.0
				for batch in pred:
					for i,val in enumerate(batch):
						if(i==ind):
							loss+= tf.log(float(val))
						else:
							loss+= tf.log(1-float(val))
				return loss

			self.loss = calc_loss(pred_start, self.start_index) + calc_loss(pred_end, self.stop_index)

		with tf.name_scope("accuracy"):

			correct_start = tf.equal(tf.argmax(pred_start, 1), self.start_index)
			self.accuracy_start = tf.reduce_mean(tf.cast(correct_start, 'float'))

			correct_stop = tf.equal(tf.argmax(pred_stop, 1), self.stop_index)
			self.accuracy_stop = tf.reduce_mean(tf.cast(correct_stop, 'float'))
Example #17
0
 def loop_body(i, *args):
   i += 1
   per_image_loss = tf.reduce_mean(tf.square(g.callable_generator(tiled_z, False) - tiled_image_batch), axis=[1, 2, 3])
   total_loss = tf.reduce_sum(per_image_loss)
   op = optimizer.minimize(total_loss, var_list=[tiled_z])
   return tf.tuple([i, tiled_z, per_image_loss], control_inputs=[op])
Example #18
0
    def _run_network_test(self, network_fun, inputs, inf_type=spn.InferenceType.MARGINAL,
                          log=False, on_gpu=True):
        """Run a single test for a single op."""
        # Preparations
        op_name = network_fun.__name__
        device_name = '/gpu:0' if on_gpu else '/cpu:0'

        # Print
        print2("--> %s: on_gpu=%s, inputs_shape=%s, inference=%s, log=%s"
               % (op_name, on_gpu, inputs.shape, ("MPE" if inf_type ==
                  spn.InferenceType.MPE else "MARGINAL"), log), self.file)

        # Compute true output
        true_out = self._true_output(network_fun, inputs, self.num_input_vals,
                                     self.num_mixtures, self.num_subsets, inf_type)

        # Create graph
        tf.reset_default_graph()
        with tf.device(device_name):
            # Create input
            inputs_pl = spn.IndicatorLeaf(num_vars=self.num_input_vars,
                                num_vals=self.num_input_vals, name="iv_x")
            # Create networks, stacking one on top of the other, although each
            # network remains unconnected and independent of each other.
            start_time = time.time()
            root, init_network, network = \
                network_fun(inputs_pl, self.num_input_vals, self.num_mixtures,
                            self.num_subsets, inf_type, log)
            for _ in range(self.num_networks - 1):
                # The tuple ensures that the next network waits for the output
                # of the previous network, effectively stacking the networks
                # but using the original input every time
                root, init_network, network = \
                    network_fun(inputs_pl, self.num_input_vals, self.num_mixtures,
                                self.num_subsets, inf_type, log, tf.tuple([network])[0])
            setup_time = time.time() - start_time
        # Get num of SPN ops
        spn_size = root.get_num_nodes() * self.num_networks
        # Get num of graph ops
        tf_size = len(tf.get_default_graph().get_operations())
        # Run op multiple times
        output_correct = True
        with tf.Session(config=tf.ConfigProto(
                allow_soft_placement=False,
                log_device_placement=self.log_devs)) as sess:
            # Initialize weights of all the sum node types in the graph
            start_time = time.time()
            init_network.run()
            weights_init_time = time.time() - start_time

            run_times = []
            # Create feed dictionary
            feed = {inputs_pl: inputs}
            for n in range(self.num_runs):
                # Run
                start_time = time.time()
                out = sess.run(network, feed_dict=feed)
                run_times.append(time.time() - start_time)
                # Test value
                try:
                    np.testing.assert_array_almost_equal((np.exp(out) if log else
                                                          out), true_out)
                except AssertionError:
                    output_correct = False
                    self.test_failed = True

            if self.profile:
                # Add additional options to trace the session execution
                options = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE)
                run_metadata = tf.RunMetadata()

                out = sess.run(network, feed_dict=feed, options=options,
                               run_metadata=run_metadata)

                # Create the Timeline object, and write it to a json file
                fetched_timeline = timeline.Timeline(run_metadata.step_stats)
                chrome_trace = fetched_timeline.generate_chrome_trace_format()
                if not os.path.exists(self.profiles_dir):
                    os.makedirs(self.profiles_dir)

                file_name = op_name
                file_name += ("_GPU" if on_gpu else "_CPU")
                file_name += ("_MPE-LOG" if log else "_MPE") if inf_type == \
                    spn.InferenceType.MPE else ("_MARGINAL-LOG" if log else
                                                "_MARGINAL")

                with open('%s/timeline_value_%s.json' % (self.profiles_dir,
                          file_name), 'w') as f:
                    f.write(chrome_trace)

        # Return stats
        return OpTestResult(op_name, on_gpu, spn_size, tf_size, setup_time,
                            weights_init_time, run_times, output_correct)
Example #19
0
def create_gradient_clipping(loss,optm,vars,clipVal=1.0):
    grads, vars = zip(*optm.compute_gradients(loss, var_list=vars))
    grads = [None if grad is None else tf.clip_by_value(grad,-clipVal,clipVal) for grad in grads]
    op = optm.apply_gradients(zip(grads, vars))
    train_op = tf.tuple([loss], control_inputs=[op])
    return train_op[0]
Example #20
0
        def loop_fn(time, cell_output, cell_state, loop_state):
            """
            Loop function that allows to control input to the rnn cell and manipulate cell outputs.
            :param time: current time step
            :param cell_output: output from previous time step or None if time == 0
            :param cell_state: cell state from previous time step
            :param loop_state: custom loop state to share information between different iterations of 
            this loop fn
            
            :return: tuple consisting of
              finished: tensor of size [bach_size] which is True for sequences that have reached their 
              end, needed because of variable sequence size
              next_input: input to next time step
              next_cell_state: cell state forwarded to next time step
              emit_output: The first return argument of raw_rnn. This is not necessarily the output of 
              the RNN cell,but could e.g. be the output 
              of a dense layer attached to the rnn layer.
              next_loop_state: loop state forwarded to the next time step
            """

            elements_finished = (time >= max_time)
            finished = tf.reduce_all(elements_finished)

            if cell_output is None:
                '''
                time == 0, used for initialization before first call to cell
                This is just to defined the desired shape of the tensors
                '''
                next_cell_state = cell.zero_state(batch_size, tf.float32)
                '''
                the emit_output in this case tells TF how future emits look
                For the first call to loop_fn the emit_output corresponds to the emit_structure which is
                then used to determine the size of the zero_tensor for the emit_ta (defaults to cell.output_size). 
                '''
                emit_output = tf.tuple([
                    tf.zeros([output_dim]),
                    tf.zeros([output_dim]),
                    tf.zeros([output_dim])
                ])
                # tf.zeros([config.batch_size, output_dim], dtype=tf.float32)  # tf.zeros([output_dim])
                next_loop_state = output_ta
                '''
                this is the initial step, i.e. there is no output from a previous time step, what we feed here
                can highly depend on the data. In this case we just assign the actual input in the first time step.
                '''
                init_z = tf.zeros((batch_size, output_dim), dtype=tf.float32)
                #init_z = tf.random_normal((config.batch_size, output_dim), 0, 1, dtype=tf.float32)
                x_time = tf.layers.dropout(inputs_ta.read(time), rate=rate_x)
                next_in = tf.concat([x_time, init_z], 1)
            else:
                '''
                t > 0, called right after call to cell, i.e. cell_output is the output from time t-1.
                here you can do whatever ou want with cell_output before assigning it to emit_output.
                In this case, we don't do anything
                pass the last state to the next
                '''

                # next_cell_state = cell_state

                # emit_output =  tf.tuple([mean, var, current_z])

                # next_in = tf.cond(finished,lambda: tf.zeros([batch_size, rnn_input_dim], dtype=tf.float32),

                # next_loop_state  = loop_state.write(time - 1,tf.concat([cell_state[0], cell_state[1]],1))

            # next_input = tf.cond(finished, lambda: tf.zeros([batch_size, rnn_input_dim], dtype=tf.float32), lambda: next_in)
            # next_input.set_shape([None, rnn_input_dim])

            return (finished, next_input, next_cell_state, emit_output,
                    next_loop_state)
Example #21
0
def train(dataset, initial_ckpt, learning_rate, logs_path, max_training_iters, save_step, display_step,
           global_step, iter_mean_grad=1, batch_size=1, momentum=0.9, resume_training=False, config=None, finetune=1):

    """Train network
    Args:
    dataset: Reference to a Dataset object instance
    initial_ckpt: Path to the checkpoint to initialize the network (May be parent network or pre-trained Imagenet)
    supervison: Level of the side outputs supervision: 1-Strong 2-Weak 3-No supervision
    learning_rate: Value for the learning rate. It can be number or an instance to a learning rate object.
    logs_path: Path to store the checkpoints
    max_training_iters: Number of training iterations
    save_step: A checkpoint will be created every save_steps
    display_step: Information of the training will be displayed every display_steps
    global_step: Reference to a Variable that keeps track of the training steps
    iter_mean_grad: Number of gradient computations that are average before updating the weights
    batch_size:
    momentum: Value of the momentum parameter for the Momentum optimizer
    resume_training: Boolean to try to restore from a previous checkpoint (True) or not (False)
    config: Reference to a Configuration object used in the creation of a Session
    finetune: Use to select to select type of training, 0 for the parent network and 1 for finetunning
    Returns:
    """
    model_name = os.path.join(logs_path, "det_lesion.ckpt")
    if config is None:
        config = tf.ConfigProto()
        config.gpu_options.allow_growth = True
        config.allow_soft_placement = True

    tf.logging.set_verbosity(tf.logging.INFO)

    # Prepare the input data
    input_image = tf.placeholder(tf.float32, [batch_size, 80, 80, 3])
    input_label = tf.placeholder(tf.float32, [batch_size])
    is_training = tf.placeholder(tf.bool, shape=())
    
    tf.summary.histogram('input_label', input_label)

    # Create the network
    with slim.arg_scope(det_lesion_arg_scope()):
        net, end_points = det_lesion_resnet(input_image, is_training_option=is_training)

    # Initialize weights from pre-trained model
    if finetune == 0:
        init_weights = load_resnet_imagenet(initial_ckpt)

    # Define loss
    with tf.name_scope('losses'):
        loss, output, target = binary_cross_entropy(net, input_label)
        total_loss = loss + tf.add_n(tf.losses.get_regularization_losses())
        tf.summary.scalar('losses/total_loss', total_loss)
        tf.summary.histogram('losses/output', output)
        tf.summary.histogram('losses/target', target)

    # Define optimization method
    with tf.name_scope('optimization'):
        tf.summary.scalar('learning_rate', learning_rate)
        optimizer = tf.train.MomentumOptimizer(learning_rate, momentum)
        #optimizer = tf.train.AdamOptimizer(learning_rate)
        grads_and_vars = optimizer.compute_gradients(total_loss)
        with tf.name_scope('grad_accumulator'):
            grad_accumulator = []
            for ind in range(0, len(grads_and_vars)):
                if grads_and_vars[ind][0] is not None:
                    grad_accumulator.append(tf.ConditionalAccumulator(grads_and_vars[0][0].dtype))
        with tf.name_scope('apply_gradient'):
            grad_accumulator_ops = []
            for ind in range(0, len(grad_accumulator)):
                if grads_and_vars[ind][0] is not None:
                    var_name = str(grads_and_vars[ind][1].name).split(':')[0]
                    var_grad = grads_and_vars[ind][0]

                    if "weights" in var_name:
                        aux_layer_lr = 1.0
                    elif "biases" in var_name:
                        aux_layer_lr = 2.0
                    
                    grad_accumulator_ops.append(grad_accumulator[ind].apply_grad(var_grad*aux_layer_lr,
                                                                                 local_step=global_step))
        with tf.name_scope('take_gradients'):
            mean_grads_and_vars = []
            for ind in range(0, len(grad_accumulator)):
                if grads_and_vars[ind][0] is not None:
                    mean_grads_and_vars.append((grad_accumulator[ind].take_grad(iter_mean_grad), grads_and_vars[ind][1]))
            apply_gradient_op = optimizer.apply_gradients(mean_grads_and_vars, global_step=global_step)

    with tf.name_scope('metrics'):
        acc_op = my_accuracy(net, input_label)
        tf.summary.scalar('metrics/accuracy', acc_op)
        
    update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
    if update_ops:
        tf.logging.info('Gathering update_ops')
        with tf.control_dependencies(tf.tuple(update_ops)):
            total_loss = tf.identity(total_loss)
       
    merged_summary_op = tf.summary.merge_all()

    # Initialize variables
    init = tf.global_variables_initializer()

    with tf.Session(config=config) as sess:
        print('Init variable')
        sess.run(init)

        # op to write logs to Tensorboard
        logs_path_train = os.path.join(logs_path,'train')
        logs_path_test = os.path.join(logs_path,'test')
        #summary_writer = tf.summary.FileWriter(logs_path + '/train', graph=tf.get_default_graph())
        #test_writer = tf.summary.FileWriter(logs_path + '/test')
        summary_writer = tf.summary.FileWriter(logs_path_train, graph=tf.get_default_graph())
        test_writer = tf.summary.FileWriter(logs_path_test)

        # Create saver to manage checkpoints
        saver = tf.train.Saver(max_to_keep=None)

        last_ckpt_path = tf.train.latest_checkpoint(logs_path)
        if last_ckpt_path is not None and resume_training:
            # Load last checkpoint
            print('Initializing from previous checkpoint...')
            saver.restore(sess, last_ckpt_path)
            step = global_step.eval() + 1
        else:
            # Load pre-trained model
            if finetune == 0:
                print('Initializing from pre-trained imagenet model...')
                init_weights(sess)
            else:
                print('Initializing from pre-trained model...')
                # init_weights(sess)
                var_list = []
                for var in tf.global_variables():
                    var_type = var.name.split('/')[-1]
                    if 'weights' in var_type or 'bias' in var_type:
                        var_list.append(var)
                saver_res = tf.train.Saver(var_list=var_list)
                saver_res.restore(sess, initial_ckpt)
            step = 1
        sess.run(interp_surgery(tf.global_variables()))
        print('Weights initialized')

        print('Start training')
        while step < max_training_iters + 1:
            # Average the gradient
            for iter_steps in range(0, iter_mean_grad):
                batch_image, batch_label, x_bb_train, y_bb_train, ids_train = dataset.next_batch(batch_size, 'train', 0.5)
                batch_image_val, batch_label_val, x_bb_val, y_bb_val, ids_val = dataset.next_batch(batch_size, 'val', 0.5)
                image = preprocess_img(batch_image, x_bb_train, y_bb_train, ids_train)
                label = batch_label
                val_image = preprocess_img(batch_image_val, x_bb_val, y_bb_val)
                label_val = batch_label_val
                run_res = sess.run([total_loss, merged_summary_op, acc_op] + grad_accumulator_ops,
                                   feed_dict={input_image: image, input_label: label, is_training: True})
                batch_loss = run_res[0]
                summary = run_res[1]
                acc = run_res[2]
                if step % display_step == 0:
                    val_run_res = sess.run([total_loss, merged_summary_op, acc_op],
                                           feed_dict={input_image: val_image, input_label: label_val, is_training: False})
                    val_batch_loss = val_run_res[0]
                    val_summary = val_run_res[1]
                    val_acc = val_run_res[2]

            # Apply the gradients
            sess.run(apply_gradient_op)

            # Save summary reports
            summary_writer.add_summary(summary, step)
            if step % display_step == 0:
                test_writer.add_summary(val_summary, step)

            # Display training status
            if step % display_step == 0:
                print("{} Iter {}: Training Loss = {:.4f}".format(datetime.now(), step, batch_loss, file=sys.stderr))
                print("{} Iter {}: Validation Loss = {:.4f}".format(datetime.now(), step, val_batch_loss, file=sys.stderr))
                print("{} Iter {}: Training Accuracy = {:.4f}".format(datetime.now(), step, acc, file=sys.stderr))
                print("{} Iter {}: Validation Accuracy = {:.4f}".format(datetime.now(), step, val_acc, file=sys.stderr)) 

            # Save a checkpoint
            if step % save_step == 0:
                save_path = saver.save(sess, model_name, global_step=global_step)
                print("Model saved in file: %s" % (save_path))

            step += 1

        if (step-1) % save_step != 0:
            save_path = saver.save(sess, model_name, global_step=global_step)
            print("Model saved in file: %s" % (save_path))

        print('Finished training.')
Example #22
0
  def _compute_gradients(self, cost):
    """Computes gradients.
    Args:
      cost: Loss function.
    Returns:
      grads_and_vars: List of tuple of gradients and variables.
    """
    config = self.config
    if not config.manual_gradients:
      return super(RevNetModel, self)._compute_gradients(cost)
    log.warning("Manually building gradient graph.")
    g = tf.get_default_graph()
    tf.get_variable_scope().reuse_variables()
    num_stages = len(self.config.num_residual_units)
    
    beta_final = tf.get_variable("unit_last/final_bn/beta")
    gamma_final = tf.get_variable("unit_last/final_bn/gamma")
   
    w_final = tf.get_variable("logit/w")
    b_final = tf.get_variable("logit/b")
    filters = [ff for ff in self.config.filters]  # Copy filter config.

    if config.use_bottleneck:
      res_func = self._bottleneck_residual_backward
      # For CIFAR-10 it's [16, 16, 32, 64] => [16, 64, 128, 256]
      for ii in range(1, len(filters)):
        filters[ii] *= 4
    else:
      res_func = self._residual_backward

    grads_list = []
    vars_list = []
    var_final = [beta_final, gamma_final, w_final, b_final]
   
    h1, h2 = self._saved_hidden[-1]
    h1, h2 = tf.stop_gradient(h1), tf.stop_gradient(h2)
    h = _concat([h1, h2], axis=3)
    with tf.variable_scope("unit_last"):
      h = self._batch_norm("final_bn", h, add_ops=False)
      h = self._relu("final_relu", h)

    h = self._global_avg_pool(h)
    with tf.variable_scope("logit"):
      logits = self._fully_connected(h, config.num_classes)
    with tf.variable_scope("costs"):
      xent = tf.nn.sparse_softmax_cross_entropy_with_logits(
          logits=logits, labels=self.label)
      cost = tf.reduce_mean(xent, name="xent")

    _grads = tf.gradients(cost, [h1, h2] + var_final, gate_gradients=True)
    dh1, dh2 = _grads[0], _grads[1]
    _grads = _grads[2:]
    # Injected dependency.
    with tf.control_dependencies(_grads):
      h_grad = (tf.identity(dh1), tf.identity(dh2))
    grads_list.extend(_grads)
    # grads_list.extend(_grads[2:])
    vars_list.extend(var_final)

    h1, h2 = self._saved_hidden[-1]
    h1, h2 = tf.stop_gradient(h1), tf.stop_gradient(h2)
    h = (h1, h2)

    # New version, using single for-loop.
    ss = num_stages - 1
    ii = config.num_residual_units[ss] - 1
    nlayers = sum(config.num_residual_units)
    for ll in range(nlayers - 1, -1, -1):
      no_activation = False
      if ii == 0:
        in_filter = filters[ss]
        stride = self._stride_arr(self.config.strides[ss])
        if ss == 0:
          no_activation = True
      else:
        in_filter = filters[ss + 1]
        stride = self._stride_arr(1)
      out_filter = filters[ss + 1]

      with tf.variable_scope("unit_{}_{}".format(ss + 1, ii)):

        # Reconstruct input.
        if ii == 0:
          h = self._saved_hidden[ss]
        else:
          h = res_func(h, out_filter)

        # Rerun the layer, and get gradients.
        h_grad, w_list, w_grad = self._residual_grad(
            h,
            h_grad,
            in_filter,
            out_filter,
            stride,
            no_activation=no_activation)

        grads_list.extend(w_grad)
        vars_list.extend(w_list)

      # Counter.
      if ii == 0:
        ss -= 1
        ii = config.num_residual_units[ss] - 1
      else:
        ii -= 1

    h_grad = _concat(h_grad, axis=3)
    w_init = tf.get_variable("init/init_conv/w")
 
    beta_init = tf.get_variable("init/init_bn/beta")
    gamma_init = tf.get_variable("init/init_bn/gamma")
    var_init = [beta_init, gamma_init, w_init]
    _grads = tf.gradients(h, var_init, h_grad)
    grads_list.extend(_grads)
    vars_list.extend(var_init)

    # Add weight decay.
    def add_wd(x):
      g, w = x[0], x[1]
      assert self._wd_hidden > 0.0, "Not applying weight decay"
      if w.name.endswith("w:0") and self._wd_hidden > 0.0:
        log.info("Adding weight decay {:.4e} for variable {}".format(
            self._wd_hidden, x[1].name))
        return g + self._wd_hidden * w, w
      else:
        return g, w

    # Always gate gradients to avoid unwanted behaviour.
    return map(add_wd, zip(tf.tuple(grads_list), vars_list))
Example #23
0
    def __init__(self, *args, **kwargs):
        super(DataFlow, self).__init__(*args, **kwargs)
        self.pattern = 'tf_records_train/train*'

        cpu_device = '/cpu:0'

        # Preprocessing
        with tf.device(cpu_device):
            file_pattern = os.path.join(self.data_dir, self.pattern)
            record_input = RecordInput(file_pattern=file_pattern,
                                       seed=Record_seed,
                                       parallelism=32,
                                       buffer_size=4000,
                                       batch_size=self.batch_size,
                                       shift_ratio=0,
                                       name='record_input')
            records = record_input.get_yield_op()
            records = tf.split(records, self.batch_size, 0)
            records = [tf.reshape(record, []) for record in records]
            images = []
            labels = []
            for idx in xrange(self.batch_size):
                value = records[idx]
                if self.with_labels:
                    image, label = self.parse_example_proto_and_process(value)
                    labels.append(label)
                else:
                    image = self.parse_example_proto_and_process(value)
                images.append(image)
            if self.with_labels:
                labels = tf.parallel_stack(labels, 0)
                labels = tf.reshape(labels, [self.batch_size])

            images = tf.parallel_stack(images)
            images = tf.reshape(images,
                                shape=[
                                    self.batch_size, self.output_size,
                                    self.output_size, self.c_dim
                                ])

            if self.format == 'NCHW':
                images = tf.transpose(images, [0, 3, 1, 2])
            images_shape = images.get_shape()
            if self.with_labels:
                labels_shape = labels.get_shape()
                image_producer_stage = StagingArea(
                    dtypes=[tf.float32, tf.int32],
                    shapes=[images_shape, labels_shape])
                image_producer_op = image_producer_stage.put([images, labels])
                image_producer_stage_get = image_producer_stage.get()
                images_and_labels = tf.tuple(
                    [image_producer_stage_get[0], image_producer_stage_get[1]],
                    control_inputs=[image_producer_op])
                images = images_and_labels[0]
                labels = images_and_labels[1]
            else:
                image_producer_stage = StagingArea(dtypes=[tf.float32],
                                                   shapes=[images_shape])
                image_producer_op = image_producer_stage.put([images])
                image_producer_stage_get = image_producer_stage.get()[0]
                images = tf.tuple([image_producer_stage_get],
                                  control_inputs=[image_producer_op])[0]

        self.images = images
        self.image_producer_op = image_producer_op
        if self.format == 'NCHW':
            self.shape = [self.c_dim, self.output_size, self.output_size]
        elif self.format == 'NHWC':
            self.shape = [self.output_size, self.output_size, self.c_dim]
        if self.with_labels:
            self.labels = labels
Example #24
0
    def _run_op_test(self,
                     op_fun,
                     inputs,
                     indices=None,
                     latent_indicators=None,
                     inf_type=spn.InferenceType.MARGINAL,
                     log=False,
                     on_gpu=True):
        """Run a single test for a single op."""
        # Preparations
        op_name = op_fun.__name__
        device_name = '/gpu:0' if on_gpu else '/cpu:0'

        # Print
        print2(
            "--> %s: on_gpu=%s, inputs_shape=%s, indices=%s, latent_indicators=%s, inference=%s, log=%s"
            % (op_name, on_gpu, inputs.shape,
               ("No" if indices is None else "Yes"),
               ("No" if latent_indicators is None else "Yes"),
               ("MPE" if inf_type == spn.InferenceType.MPE else "MARGINAL"),
               log), self.file)

        input_size = inputs.shape[1]

        # Compute true output
        true_out = self._true_output(op_fun, inputs, indices,
                                     latent_indicators)

        # Create graph
        tf.reset_default_graph()
        with tf.device(device_name):
            # Create input
            inputs_pl = spn.RawLeaf(num_vars=input_size)
            # Create IndicatorLeaf
            if latent_indicators is None:
                latent_indicators_pl = [None for _ in range(self.num_sums)]
            else:
                if op_fun is Ops.sum:
                    latent_indicators_pl = [
                        spn.IndicatorLeaf(num_vars=1, num_vals=input_size)
                        for _ in range(self.num_sums)
                    ]
                elif op_fun is Ops.par_sums or Ops.sums:
                    latent_indicators_pl = [
                        spn.IndicatorLeaf(num_vars=self.num_sums,
                                          num_vals=input_size)
                    ]
            # Create ops
            start_time = time.time()
            init_ops, ops = op_fun(inputs_pl, indices, latent_indicators_pl,
                                   self.num_sums, inf_type, log)
            for _ in range(self.num_ops - 1):
                # The tuple ensures that the next op waits for the output
                # of the previous op, effectively stacking the ops
                # but using the original input every time
                # init_ops, ops = op_fun(inputs_pl, indices, latent_indicators_pl, self.num_sums,
                #                        inf_type, log, tf.tuple([ops])[0])
                init_ops, ops = op_fun(inputs_pl, indices,
                                       latent_indicators_pl, self.num_sums,
                                       inf_type, log,
                                       tf.tuple([ops[-1]])[0])
            setup_time = time.time() - start_time
        # Get num of graph ops
        graph_size = len(tf.get_default_graph().get_operations())
        # Run op multiple times
        output_correct = True
        with tf.Session(config=tf.ConfigProto(
                allow_soft_placement=False,
                log_device_placement=self.log_devs)) as sess:
            # Initialize weights of all the sum nodes in the graph
            start_time = time.time()
            init_ops.run()

            run_times = []
            # Create feed dictionary
            feed = {inputs_pl: inputs}
            if latent_indicators is not None:
                for iv_pl in latent_indicators_pl:
                    feed[iv_pl] = latent_indicators

            for n in range(self.num_runs):
                # Run
                start_time = time.time()
                out = sess.run(ops, feed_dict=feed)
                run_times.append(time.time() - start_time)
                # Test value
                try:
                    np.testing.assert_array_almost_equal(out[0], true_out)
                except AssertionError:
                    output_correct = False
                    self.test_failed = True

            if self.profile:
                # Add additional options to trace the session execution
                options = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE)
                run_metadata = tf.RunMetadata()

                out = sess.run(ops,
                               feed_dict=feed,
                               options=options,
                               run_metadata=run_metadata)

                # Create the Timeline object, and write it to a json file
                fetched_timeline = timeline.Timeline(run_metadata.step_stats)
                chrome_trace = fetched_timeline.generate_chrome_trace_format()
                if not os.path.exists(self.profiles_dir):
                    os.makedirs(self.profiles_dir)

                file_name = op_name
                file_name += ("_GPU" if on_gpu else "_CPU")
                file_name += ("_MPE-LOG" if log else "_MPE") if inf_type == \
                    spn.InferenceType.MPE else ("_MARGINAL-LOG" if log else
                                                "_MARGINAL")
                if indices is not None:
                    file_name += "_Indices"
                if latent_indicators is not None:
                    file_name += "_IVS"

                with open(
                        '%s/timeline_path_%s.json' %
                    (self.profiles_dir, file_name), 'w') as f:
                    f.write(chrome_trace)

        # Return stats
        return OpTestResult(op_name, on_gpu, graph_size,
                            ("No" if indices is None else "Yes"),
                            ("No" if latent_indicators is None else "Yes"),
                            setup_time, run_times, output_correct)
Example #25
0
    def buildModel(self, inputShape):

        #Running on GPU
        with tf.device(self.device):
            with tf.name_scope("inputOps"):
                #Get convolution variables as placeholders
                self.imageShape = (self.batchSize, inputShape[0], inputShape[1], inputShape[2])
                self.inputImage = node_variable(self.imageShape, "inputImage")

            self.V1_W = []
            self.normalize_W = []
            self.V1_A = []
            self.V1_Y = []
            self.oldA = []
            self.oldY = []
            self.randV1 = []
            self.resetV1 = []
            self.resetY = []

            self.recon = []
            self.error = []
            self.reconError = []
            self.sparseError = []
            self.scaledInput = []

            self.nnz = []
            self.errorStd = []
            self.l1_mean = []
            self.t_errorStd = []
            self.t_l1_mean = []
            self.log_V1_A = []

            self.WShape = []
            self.VShape = []
            self.inShape = []

            for l in range(self.numLayers):
                if l == 0:
                    numInF = inputShape[2]
                else:
                    numInF = self.numV[l-1]

                V_Y = float(inputShape[0])
                V_X = float(inputShape[1])

                for i in range(l+1):
                    V_Y_Prev = V_Y
                    V_X_Prev = V_X
                    assert(int(V_Y) % self.VStrideY[i] == 0)
                    assert(int(V_X) % self.VStrideX[i] == 0)
                    V_Y = V_Y/self.VStrideY[i]
                    V_X = V_X/self.VStrideX[i]

                V_Y = int(V_Y)
                V_Y_Prev = int(V_Y_Prev)
                V_X = int(V_X)
                V_X_Prev = int(V_X_Prev)

                self.WShape.append((self.patchSizeY[l], self.patchSizeX[l], numInF, self.numV[l]))
                self.VShape.append((self.batchSize, V_Y, V_X, self.numV[l]))
                self.inShape.append((self.batchSize, V_Y_Prev, V_X_Prev, numInF))

                with tf.name_scope("Dictionary"):
                    self.V1_W.append(weight_variable_xavier(self.WShape[l], "V1_W"+str(l), conv=True))

                with tf.name_scope("weightNorm"):
                    self.normVals = tf.sqrt(tf.reduce_sum(tf.square(self.V1_W[l]), reduction_indices=[0, 1, 2], keep_dims=True))
                    self.normalize_W.append(self.V1_W[l].assign(self.V1_W[l]/(self.normVals+1e-8)))

                with tf.name_scope("FISTA"):
                    #Soft threshold
                    self.V1_A.append(weight_variable(self.VShape[l], "V1_A"+str(l), 1e-3))
                    self.V1_Y.append(weight_variable(self.VShape[l], "V1_Y"+str(l), 1e-3))

                    self.oldA.append(weight_variable(self.VShape[l], "oldA"+str(l), 1e-3))
                    self.oldY.append(weight_variable(self.VShape[l], "oldY"+str(l), 1e-3))

                    self.T = tf.Variable(1.0, "T")
                    self.oldT = tf.Variable(1.0, "oldT")

                    self.randV1.append(tf.truncated_normal(self.VShape[l], mean=0, stddev=1e-3))
                    #Reassign nodes
                    self.resetV1.append(self.V1_A[l].assign(self.randV1[l]))
                    self.resetY.append(self.V1_Y[l].assign(self.V1_A[l]))

                    self.resetT = self.T.assign(1.0)

                with tf.name_scope("Recon"):
                    assert(self.VStrideY[l] >= 1)
                    assert(self.VStrideX[l] >= 1)
                    #We build index tensor in numpy to gather
                    self.recon.append(conv2d_oneToMany(self.V1_A[l], self.V1_W[l], self.inShape[l], "recon", self.VStrideY[l], self.VStrideX[l]))

                with tf.name_scope("Error"):
                    #Scale inputImage
                    if(l == 0):
                        #self.scaledInput.append(self.inputImage/np.sqrt(self.patchSizeX[0]*self.patchSizeY[0]*inputShape[2]))
                        self.scaledInput.append(self.inputImage)
                    else:
                        #self.scaledInput.append(self.V1_A[l-1]/np.sqrt(self.patchSizeX[l]*self.patchSizeY[l]*self.numV[l-1]))
                        self.scaledInput.append(self.V1_A[l-1])
                    self.error.append(self.scaledInput[l] - self.recon[l])

                with tf.name_scope("Loss"):
                    self.reconError.append(tf.reduce_mean(tf.reduce_sum(tf.square(self.error[l]), reduction_indices=[1, 2, 3])))
                    self.sparseError.append(tf.reduce_mean(tf.reduce_sum(tf.abs(self.V1_A[l]), reduction_indices=[1, 2, 3])))

                with tf.name_scope("stats"):
                    self.nnz.append(tf.reduce_mean(tf.cast(tf.not_equal(self.V1_A[l], 0), tf.float32)))

                    eStd = tf.sqrt(tf.reduce_mean(tf.square(self.error[l] - tf.reduce_mean(self.error[l]))))
                    inStd = tf.sqrt(tf.reduce_mean(tf.square(self.scaledInput[l] - tf.reduce_mean(self.scaledInput[l]))))

                    self.errorStd.append(eStd/inStd)

                    self.l1_mean.append(tf.reduce_mean(tf.abs(self.V1_A[l])))

                    #For log of activities
                    self.log_V1_A.append(tf.log(tf.abs(self.V1_A[l])+1e-15))

            with tf.name_scope("Loss"):
                #Define loss
                self.reconLoss = self.reconError[0]/2
                for l in range(1, self.numLayers):
                    self.reconLoss += self.reconError[l]/2

                self.loss = self.reconLoss
                for l in range(self.numLayers):
                    self.loss += self.thresh[l] * self.sparseError[l]

            with tf.name_scope("Opt"):
                ##Define optimizer
                #self.reconGrad = self.learningRateA * tf.gradients(self.reconLoss, self.V1_A)
                self.reconGrads = tf.gradients(self.reconLoss, self.V1_A)

                #Store old values in tensors
                #This is to avoid updating a variable too early to affect new values
                assignList = []
                for l in range(self.numLayers):
                    assignList.append(self.oldA[l].assign(self.V1_A[l]))
                    assignList.append(self.oldY[l].assign(self.V1_Y[l]))
                assignList.append(self.oldT.assign(self.T))
                self.optimizerA0 = tf.tuple(assignList)

                optimizerList = []

                newT = (1+tf.sqrt(4*tf.square(self.oldT)))/2
                for l in range(self.numLayers):
                    newA = tf.nn.relu(tf.abs(self.oldY[l] - self.learningRateA[l] * self.reconGrads[l]) - self.thresh[l]*self.learningRateA[l]) * tf.sign(self.oldA[l])
                    newY = newA + ((self.oldT-1)/(newT+1e-8))*(newA-self.oldA[l])
                    #We update actual variables
                    optimizerList.append(self.V1_Y[l].assign(newY))
                    optimizerList.append(self.V1_A[l].assign(newA))
                optimizerList.append(self.T.assign(newT))

                self.optimizerA = tf.tuple(optimizerList)

                optWList = []
                for l in range(self.numLayers):
                    optWList.append(tf.train.AdadeltaOptimizer(self.learningRateW[l], epsilon=1e-6).minimize(self.loss,
                            var_list=
                                [self.V1_W[l]]
                            ))

                self.optimizerW = tf.group(*optWList)


        with tf.name_scope("ReconVis"):
            self.visRecon = []
            self.t_visRecon = []
            for l in range(self.numLayers):
                outRecon = self.recon[l]
                for ll in range(l)[::-1]:
                    #We prob recons down layers
                    outRecon = conv2d_oneToMany(outRecon, self.V1_W[ll], self.inShape[ll], "recon_"+str(l)+"_"+str(ll), self.VStrideY[ll], self.VStrideX[ll])
                self.visRecon.append(outRecon)

        with tf.name_scope("WeightVis"):
            self.visWeight = []

            for l in range(self.numLayers):
                outWeight = tf.transpose(self.V1_W[l], [3, 0, 1, 2])
                numN = self.WShape[l][3]
                numY = self.WShape[l][0]
                numX = self.WShape[l][1]
                numF = self.WShape[l][2]

                for ll in range(l)[::-1]:
                    numY = self.WShape[ll][0] + (numY-1) * self.VStrideY[ll]
                    numX = self.WShape[ll][1] + (numX-1) * self.VStrideX[ll]
                    numF = self.WShape[ll][2]
                    inShape = (numN, numY, numX, numF)
                    outWeight = conv2d_oneToMany(outWeight, self.V1_W[ll], inShape, "weight_"+str(l)+"_"+str(ll), self.VStrideY[ll], self.VStrideX[ll], padding="VALID")

                self.visWeight.append(outWeight)

        #Summaries
        self.s_loss = tf.scalar_summary('loss', self.loss, name="lossSum")
        self.h_input = tf.histogram_summary('inputImage', self.inputImage, name="input")

        for l in range(self.numLayers):
            self.s_recon = tf.scalar_summary('recon error' + str(l), self.reconError[l], name="reconError")
            self.s_errorStd= tf.scalar_summary('errorStd' + str(l), self.errorStd[l], name="errorStd")
            self.s_l1= tf.scalar_summary('l1 sparsity' + str(l), self.sparseError[l], name="sparseError")
            self.s_l1_mean = tf.scalar_summary('l1 mean' + str(l), self.l1_mean[l], name="l1Mean")
            self.s_s_nnz = tf.scalar_summary('nnz' + str(l), self.nnz[l], name="nnz")

            self.h_input = tf.histogram_summary('scaledInput'+str(l), self.scaledInput[l], name="input")
            self.h_recon = tf.histogram_summary('recon' + str(l), self.recon[l], name="recon")
            self.h_v1_w = tf.histogram_summary('V1_W' + str(l), self.V1_W[l], name="V1_W")
            self.h_v1_a = tf.histogram_summary('V1_A' + str(l), self.V1_A[l], name="V1_A")
            self.h_log_v1_a = tf.histogram_summary('Log_V1_A' + str(l), self.log_V1_A[l], name="Log_V1_A")
Example #26
0
def train():
    """Train for a number of steps."""
    with tf.Graph().as_default(), tf.device('/cpu:0'):
        # Create a variable to count the number of train() calls. This equals the
        # number of batches processed * FLAGS.num_gpus.
        global_step = tf.get_variable(
            'global_step', [],
            initializer=tf.constant_initializer(0), trainable=False)

        # Decay the learning rate exponentially based on the number of steps.
        lr = create_learning_rate_scheduler(global_step, dataset=MTVSOData(subset='train'))

        # Create an optimizer that performs gradient descent.
        opt = create_optimizer(lr)

        # Calculate the gradients for each model tower.
        tower_grads, tower_logits, tower_labels, tower_losses = [], [], [], []
        reuse = None
        # tf.variable_scope outside the loop is needed for the code to work on TensorFlow versions >=0.12
        # https://github.com/tensorflow/tensorflow/issues/6220#issuecomment-266425068
        with tf.variable_scope(tf.get_variable_scope()):
            for i in range(FLAGS.num_gpus):
                with tf.device('/gpu:%d' % i):
                    with tf.name_scope('%s_%d' % ('tower', i)) as scope:
                        # Calculate the loss for one tower. This function constructs
                        # the entire model but shares the variables across all towers.
                        loss, logits, labels = tower_loss(scope, reuse)

                        # Reuse variables for the next tower.
                        reuse = True
                        #tf.get_variable_scope().reuse_variables()

                        # Retain the summaries from the final tower.
                        summaries = tf.get_collection(tf.GraphKeys.SUMMARIES, scope)

                        # Calculate the gradients for the batch of data on this tower.
                        grads = opt.compute_gradients(loss, var_list=get_variables(["visual_fc", "linear_anp", "fusion"]))

                        # Keep track of the gradients across all towers.
                        tower_grads.append(grads)
                        tower_logits.append(logits)
                        tower_labels.append(labels)
                        tower_losses.append(loss)

        # Concatenate the outputs of all towers
        logits_op = concat(tower_logits, 0, 'concat_logits')
        labels_op = concat(tower_labels, 0, 'concat_labels')
        loss_op = tf.reduce_mean(tower_losses)

        # Update BN's moving_mean and moving_variance
        update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
        if update_ops:
            tf.logging.info('Gathering update_ops')
            with tf.control_dependencies(tf.tuple(update_ops)):
                loss_op = tf.identity(loss_op)

        # Track the loss of all towers
        summaries.append(tf_.scalar_summary('combined_loss', loss_op))

        # Compute top-1 accuracy
        top1_accuracy_op = top_k_accuracy(logits_op, labels_op, k=1)

        # Compute top-5 accuracy
        top5_accuracy_op = top_k_accuracy(logits_op, labels_op, k=5)

        # We must calculate the mean of each gradient. Note that this is the
        # synchronization point across all towers.
        grads = average_gradients(tower_grads)

        # Add a summary to track the learning rate.
        summaries.append(tf_.scalar_summary('learning_rate', lr))

        # Add histograms for trainable variables and gradients.
        maybe_track_vars_and_gradients(grads, summaries)

        # for op in tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES):
        #     tf.logging.info(op.name)

        # Apply the gradients to adjust the shared variables.
        apply_gradient_op = opt.apply_gradients(grads, global_step=global_step)

        # Track the moving averages of all trainable variables.
        variable_averages = tf.train.ExponentialMovingAverage(FLAGS.moving_average_decay, global_step)
        variables_averages_op = variable_averages.apply(tf.trainable_variables())

        # Group all updates to into a single train op.
        train_op = tf.group(apply_gradient_op, variables_averages_op)

        # Create a saver.
        saver = tf.train.Saver(tf.global_variables(), max_to_keep=1)

        # Build an initialization operation to run below.
        init = tf.global_variables_initializer()

        # Start running operations on the Graph. allow_soft_placement must be set to
        # True to build towers on GPU, as some of the ops do not have GPU implementations.
        sess = tf.InteractiveSession(config=tf.ConfigProto(
            allow_soft_placement=True,
            log_device_placement=FLAGS.log_device_placement))

        sess.run(init)

        if FLAGS.resume_training:
            # Restore model weights in the case that we are resuming training
            restore_model(sess, saver)
        else:
            # If it is not resuming training, simply load the weights of the noun and adjective resnet
            restore_model(sess, saver, current_scope="resnet_nouns_v1_50", checkpoint_scope='resnet_v1_50')
            restore_model(sess, saver, current_scope="resnet_adjectives_v1_50", checkpoint_scope='resnet_v1_50')

        # Manually set the learning rate if there is no learning rate decay and we are resuming training
        overwrite_learning_rate(sess, lr)

        # Start the queue runners.
        tf.train.start_queue_runners(sess=sess)

        summary_writer = tf_.summary_writer(FLAGS.train_dir, sess.graph)
        accumulated_top1_accuracy_10_steps, accumulated_top1_accuracy_100_steps = 0., 0.
        accumulated_top5_accuracy_10_steps, accumulated_top5_accuracy_100_steps = 0., 0.

        for step in range(FLAGS.max_steps):
            g_step = global_step.eval()
            start_time = time.time()
            _, loss_value, top1_accuracy_value, top5_accuracy_value = sess.run([train_op, loss_op,
                                                                                top1_accuracy_op,
                                                                                top5_accuracy_op])
            duration = time.time() - start_time

            assert not np.isnan(loss_value), 'Model diverged with loss = NaN'

            accumulated_top1_accuracy_10_steps += top1_accuracy_value
            accumulated_top1_accuracy_100_steps += top1_accuracy_value
            accumulated_top5_accuracy_10_steps += top5_accuracy_value
            accumulated_top5_accuracy_100_steps += top5_accuracy_value

            # The first step is slower since we have to wait until the examples queue has over min_examples
            # so we will not log the throughput at step 0
            if step == 0:
                continue

            if step % 10 == 0:
                num_examples_per_step = FLAGS.batch_size * FLAGS.num_gpus
                examples_per_sec = num_examples_per_step / duration
                sec_per_batch = duration / FLAGS.num_gpus

                format_str = '%s: step %d, loss = %.2f, top-1 = %.3f%%, top-5 = %.3f%% ' \
                             '(%.1f examples/sec; %.3f sec/batch)'
                tf.logging.info(format_str % (datetime.datetime.now(), g_step, loss_value,
                                              accumulated_top1_accuracy_10_steps * 10,
                                              accumulated_top5_accuracy_10_steps * 10,
                                              examples_per_sec, sec_per_batch))
                accumulated_top1_accuracy_10_steps = 0.
                accumulated_top5_accuracy_10_steps = 0.

            if step % 100 == 0:

                save_accuracy(g_step, accumulated_top1_accuracy_100_steps,
                                    accumulated_top5_accuracy_100_steps);

                # Build the summary operation from the last tower summaries.
                summary_op = tf_.merge_summary(summaries)
                summary_str = sess.run(summary_op)
                summary_writer.add_summary(summary_str, g_step - 1)

                accumulated_top1_accuracy_100_steps = 0.
                accumulated_top5_accuracy_100_steps = 0.

            # Save the model checkpoint periodically.
            maybe_save_model(sess, saver, step, global_step)

            # Evaluate the model periodically
            maybe_submit_evaluation_job(step)
Example #27
0
                        )).batch(1).prefetch(2)
    test_dataset = tf.data.Dataset.from_tensor_slices(test).map(
        lambda f: tuple(tf.py_func(parse_fn, [f], [tf.float64, tf.float64])
                        )).batch(1).repeat()

    train_iterator = train_dataset.make_one_shot_iterator()
    test_iterator = test_dataset.make_one_shot_iterator()

    handle = tf.placeholder(tf.string, shape=[])
    iter = tf.data.Iterator.from_string_handle(handle,
                                               train_dataset.output_types,
                                               train_dataset.output_shapes)

    next_el = iter.get_next()
    next_el = tf.tuple(
        [tf.squeeze(next_el[0], [0]),
         tf.squeeze(next_el[1], [0])])

    train_handle, test_handle = sess.run(
        [train_iterator.string_handle(),
         test_iterator.string_handle()])
    # initialize the iterator
    # sess.run([test_iterator.initializer])

    # simulate training
    for i in range(EPOCHS):
        if i % 3 == 0:
            # run validation
            out = sess.run(next_el, feed_dict={handle: test_handle})
            print("test out: {}".format(out))
        try:
Example #28
0
def build_graph():
    #     z = tf.placeholder(tf.float32, shape=(batch_size, z_dim))
    noise_dist = tf.contrib.distributions.Normal(0., 1.)
    z = noise_dist.sample((batch_size, z_dim))
    generator = generator_mlp if is_mlp else generator_conv
    critic = critic_mlp if is_mlp else critic_conv
    with tf.variable_scope('generator'):
        train = generator(z)
    real_data = tf.placeholder(dtype=tf.float32,
                               shape=(batch_size, 32, 32, channel))
    true_logit = critic(real_data)
    fake_logit = critic(train, reuse=True)
    c_loss = tf.reduce_mean(fake_logit - true_logit)
    if mode is 'gp':
        alpha_dist = tf.contrib.distributions.Uniform(low=0., high=1.)
        alpha = alpha_dist.sample((batch_size, 1, 1, 1))
        interpolated = real_data + alpha * (train - real_data)
        inte_logit = critic(interpolated, reuse=True)
        gradients = tf.gradients(inte_logit, [
            interpolated,
        ])[0]
        grad_l2 = tf.sqrt(tf.reduce_sum(tf.square(gradients), axis=[1, 2, 3]))
        gradient_penalty = tf.reduce_mean((grad_l2 - 1)**2)
        gp_loss_sum = tf.summary.scalar("gp_loss", gradient_penalty)
        grad = tf.summary.scalar("grad_norm", tf.nn.l2_loss(gradients))
        c_loss += lam * gradient_penalty
    g_loss = tf.reduce_mean(-fake_logit)
    g_loss_sum = tf.summary.scalar("g_loss", g_loss)
    c_loss_sum = tf.summary.scalar("c_loss", c_loss)
    img_sum = tf.summary.image("img", train, max_outputs=10)
    theta_g = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES,
                                scope='generator')
    theta_c = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES,
                                scope='critic')
    counter_g = tf.Variable(trainable=False, initial_value=0, dtype=tf.int32)
    opt_g = ly.optimize_loss(
        loss=g_loss,
        learning_rate=learning_rate_ger,
        optimizer=partial(tf.train.AdamOptimizer, beta1=0.5, beta2=0.9)
        if is_adam is True else tf.train.RMSPropOptimizer,
        variables=theta_g,
        global_step=counter_g,
        summaries=['gradient_norm'])
    counter_c = tf.Variable(trainable=False, initial_value=0, dtype=tf.int32)
    opt_c = ly.optimize_loss(
        loss=c_loss,
        learning_rate=learning_rate_dis,
        optimizer=partial(tf.train.AdamOptimizer, beta1=0.5, beta2=0.9)
        if is_adam is True else tf.train.RMSPropOptimizer,
        variables=theta_c,
        global_step=counter_c,
        summaries=['gradient_norm'])
    if mode is 'regular':
        clipped_var_c = [
            tf.assign(var, tf.clip_by_value(var, clamp_lower, clamp_upper))
            for var in theta_c
        ]
        # merge the clip operations on critic variables
        with tf.control_dependencies([opt_c]):
            opt_c = tf.tuple(clipped_var_c)
    if not mode in ['gp', 'regular']:
        raise (NotImplementedError('Only two modes'))
    return opt_g, opt_c, real_data
Example #29
0
    def _build_train_graph(self):
        with tf.variable_scope(self.name):
            X = tf.placeholder(tf.float32, [None] + self.shape)
            z = tf.placeholder(tf.float32, [None, self.z_dim])
            global_step = tf.Variable(0, name='global_step', trainable=False)

            G = self._generator(z)
            C_real = self._critic(X)
            C_fake = self._critic(G, reuse=True)

            W_dist = tf.reduce_mean(C_real - C_fake)
            C_loss = -W_dist
            G_loss = tf.reduce_mean(-C_fake)

            C_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES,
                                       scope=self.name + '/critic/')
            G_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES,
                                       scope=self.name + '/generator/')

            C_update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS,
                                             scope=self.name + '/critic/')
            G_update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS,
                                             scope=self.name + '/generator/')

            # In the paper, critic networks has been trained n_critic times for each training step.
            # Here I adjust learning rate instead.
            with tf.control_dependencies(C_update_ops):
                C_train_op = tf.train.RMSPropOptimizer(learning_rate=self.D_lr*self.n_critic).\
                    minimize(C_loss, var_list=C_vars)
            with tf.control_dependencies(G_update_ops):
                G_train_op = tf.train.RMSPropOptimizer(learning_rate=self.G_lr).\
                    minimize(G_loss, var_list=G_vars, global_step=global_step)

            # weight clipping
            ''' It is right that clips gamma of the batch_norm? '''

            # ver 1. clips all variables in critic
            C_clips = [
                tf.assign(var, tf.clip_by_value(var, -0.01, 0.01))
                for var in C_vars
            ]  # with gamma

            # ver 2. does not work
            # C_clips = [tf.assign(var, tf.clip_by_value(var, -0.01, 0.01)) for var in C_vars if 'gamma' not in var.op.name] # without gamma

            # ver 3. works but strange
            # C_clips = []
            # for var in C_vars:
            #     if 'gamma' not in var.op.name:
            #         C_clips.append(tf.assign(var, tf.clip_by_value(var, -0.01, 0.01)))
            #     else:
            #         C_clips.append(tf.assign(var, tf.clip_by_value(var, -1.00, 1.00)))

            with tf.control_dependencies([C_train_op]):  # should be iterable
                C_train_op = tf.tuple(C_clips)  # tf.group ?

            # summaries
            # per-step summary
            self.summary_op = tf.summary.merge([
                tf.summary.scalar('G_loss', G_loss),
                tf.summary.scalar('C_loss', C_loss),
                tf.summary.scalar('W_dist', W_dist)
            ])

            # sparse-step summary
            tf.summary.image('fake_sample',
                             G,
                             max_outputs=self.FAKE_MAX_OUTPUT)
            # tf.summary.histogram('real_probs', D_real_prob)
            # tf.summary.histogram('fake_probs', D_fake_prob)
            self.all_summary_op = tf.summary.merge_all()

            # accesible points
            self.X = X
            self.z = z
            self.D_train_op = C_train_op  # compatibility for train.py
            self.G_train_op = G_train_op
            self.fake_sample = G
            self.global_step = global_step
Example #30
0
train_pairs = tf.constant(ind_pairs_train)
tr_data = tf.data.Dataset.from_tensor_slices(train_pairs)
# tr_data = tr_data.map(lambda pair: tf.py_func(input_parser,[pair],tf.double))
tr_data = tr_data.map(lambda pair: tf.py_func(input_parser, [pair], tf.double),
                      num_parallel_calls=12)
tr_data = tr_data.batch(batchsize)
tr_data = tr_data.prefetch(batchsize)

iterator = tf.data.Iterator.from_structure(tr_data.output_types,
                                           tr_data.output_shapes)
next_element = iterator.get_next()

tr_init_op = iterator.make_initializer(tr_data)
im1, im2, im3 = tf.split(next_element, 3, 3)
triplet_batch = tf.tuple((im1, im2, im3))

# --------------------------------------------------
print('model')
# --------------------------------------------------

from Models import Model

model = Model(nchannels, imcropsize, testIdx)
print('reslearn: ', model.residualLearning)

# --------------------------------------------------
print('train')
# --------------------------------------------------

saver = tf.train.Saver()
Example #31
0
    def _inference(self, memories, sentences, answers, keep_prob, mem_idx,
                   sent_lexical_features, mem_lexical_features):
        with tf.variable_scope(self._name):
            memory_rnn_cell_fw = tf.contrib.rnn.GRUCell(
                self._rnn_memory_hidden_size)
            memory_rnn_cell_fw = tf.contrib.rnn.DropoutWrapper(
                memory_rnn_cell_fw,
                input_keep_prob=keep_prob,
                output_keep_prob=keep_prob)
            memory_rnn_cell_bw = tf.contrib.rnn.GRUCell(
                self._rnn_memory_hidden_size)
            memory_rnn_cell_bw = tf.contrib.rnn.DropoutWrapper(
                memory_rnn_cell_bw,
                input_keep_prob=keep_prob,
                output_keep_prob=keep_prob)

            mem_len = self._seq_len(memories)
            # [None]
            sent_len = self._seq_len(sentences)
            # [None]

            sent_emb = tf.nn.embedding_lookup(self._emb, sentences)
            # [None, sentence_size, emb_size]

            # m_emb = tf.nn.embedding_lookup(self._weight_matrices[0], memories)
            m_emb = tf.nn.embedding_lookup(self._emb, memories)
            # [None, memory_size, emb_size]
            c_emb = tf.nn.embedding_lookup(self._emb, memories)
            # [None, memory_size, emb_size]

            sent_emb = tf.concat(values=[sent_emb, sent_lexical_features],
                                 axis=2)
            # [None, sentence_size, emb_size + lexical_features_size]

            m_emb = tf.concat(values=[m_emb, mem_lexical_features], axis=2)
            # [None, memory_size, emb_size + lexical_features_size]
            c_emb = tf.concat(values=[c_emb, mem_lexical_features], axis=2)
            # [None, memory_size, emb_size + lexical_features_size]

            with tf.variable_scope("memory_rnn") as m_sentence_rnn_scope:
                (m_rnn_fw, m_rnn_bw), (_, _) = tf.nn.bidirectional_dynamic_rnn(
                    memory_rnn_cell_fw,
                    memory_rnn_cell_bw,
                    m_emb,
                    dtype=tf.float32,
                    sequence_length=mem_len,
                    scope=m_sentence_rnn_scope,
                    swap_memory=True,
                )
                # m_rnn_f/bw: [None, memory_size, rnn_memory_hidden_size]
                # m_rnn_state_f/bw: [None, rnn_memory_hidden_size]

                Wm_memory_rnn_fw = tf.get_variable(
                    initializer=self._init,
                    shape=self._rnn_memory_Ws_shape,
                    name="W_memory_rnn_fw",
                )
                Wm_memory_rnn_bw = tf.get_variable(
                    initializer=self._init,
                    shape=self._rnn_memory_Ws_shape,
                    name="W_memory_rnn_bw",
                )
                bm_memory_rnn = tf.get_variable(
                    initializer=self._init,
                    shape=self._rnn_memory_bs_shape,
                    name="b_memory_rnn")
                m_rnn_output = self._nonlin(
                    self._tensor_dot(m_rnn_fw, Wm_memory_rnn_fw) +
                    self._tensor_dot(m_rnn_bw, Wm_memory_rnn_bw) +
                    bm_memory_rnn)
                # [None, memory_size, emb_size]

                m = m_rnn_output

                # sent_emb: [None, sentence_size, emb_size]
                W_sent_rnn_fw = tf.get_variable(
                    initializer=self._init,
                    shape=self._rnn_memory_Ws_shape,
                    name="W_sentence_rnn_fw",
                )
                W_sent_rnn_bw = tf.get_variable(
                    initializer=self._init,
                    shape=self._rnn_memory_Ws_shape,
                    name="W_sentence_rnn_bw",
                )
                b_sent_rnn = tf.get_variable(initializer=self._init,
                                             shape=self._rnn_memory_bs_shape,
                                             name="b_sentence_rnn")

                m_sentence_rnn_scope.reuse_variables()
                (sent_rnn_fw,
                 sent_rnn_bw), _ = tf.nn.bidirectional_dynamic_rnn(
                     memory_rnn_cell_fw,
                     memory_rnn_cell_bw,
                     sent_emb,
                     dtype=tf.float32,
                     sequence_length=sent_len,
                     scope=m_sentence_rnn_scope,
                     swap_memory=True,
                 )
                # sent_rnn_f/bw: [None, memory_size, rnn_memory_hidden_size]
                # sent_rnn_state_f/bw: [None, rnn_memory_hidden_size]
                sent_rnn_output = self._nonlin(
                    self._tensor_dot(sent_rnn_fw, W_sent_rnn_fw) +
                    self._tensor_dot(sent_rnn_bw, W_sent_rnn_bw) + b_sent_rnn)
                # [None, memory_size, emb_size]
            sent_emb = sent_rnn_output

            mem_rnn_cell = MemoryNetworkNERCell(
                self._memory_size,
                self._embedding_feature_size,
                m,
                m,
                return_link=True,
            )

            mem_idx_expanded = tf.expand_dims(input=mem_idx,
                                              axis=-1,
                                              name="doc_start_index_reshaped")

            (mem_rnn_output, mem_rnn_link), mem_rnn_state = tf.nn.dynamic_rnn(
                mem_rnn_cell,
                tf.tuple([sent_emb, mem_idx_expanded]),
                dtype=tf.float32,
                sequence_length=sent_len)
            # mem_rnn_output: [None, max_seq_len, hidden_size]
            # mem_rnn_link: [None, max_seq_len, max_seq_len]
            # mem_rnn_state: [None, hidden_size]

            rnn2mlp = self._tensor_dot(mem_rnn_output, self.RNN) + self.RNN_b
            # [None, sentence_size, mlp_hidden_size]
            mlp2tag = self._tensor_dot(rnn2mlp, self.RNN2TAG) + self.RNN2TAG_b
            # [None, sentence_size, answer_size]

            log_likelihood, transition_params = tf.contrib.crf.crf_log_likelihood(
                mlp2tag, answers, sent_len)

            return sent_len, mlp2tag, log_likelihood, transition_params, mem_rnn_link
Example #32
0
	def train(imPath,logPath,modelPath,pmPath,nTrain,nValid,nTest,restoreVariables,nSteps,gpuIndex,testPMIndex):
		os.environ['CUDA_VISIBLE_DEVICES']= '%d' % gpuIndex

		outLogPath = logPath
		trainWriterPath = pathjoin(logPath,'Train')
		validWriterPath = pathjoin(logPath,'Valid')
		outModelPath = pathjoin(modelPath,'model.ckpt')
		outPMPath = pmPath
		
		batchSize = UNet2D.hp['batchSize']
		imSize = UNet2D.hp['imSize']
		nChannels = UNet2D.hp['nChannels']
		nClasses = UNet2D.hp['nClasses']

		# --------------------------------------------------
		# data
		# --------------------------------------------------

		Train = np.zeros((nTrain,imSize,imSize,nChannels))
		Valid = np.zeros((nValid,imSize,imSize,nChannels))
		Test = np.zeros((nTest,imSize,imSize,nChannels))
		LTrain = np.zeros((nTrain,imSize,imSize,nClasses))
		LValid = np.zeros((nValid,imSize,imSize,nClasses))
		LTest = np.zeros((nTest,imSize,imSize,nClasses))

		print('loading data, computing mean / st dev')
		if not os.path.exists(modelPath):
			os.makedirs(modelPath)
		if restoreVariables:
			datasetMean = loadData(pathjoin(modelPath,'datasetMean.data'))
			datasetStDev = loadData(pathjoin(modelPath,'datasetStDev.data'))
		else:
			datasetMean = 0
			datasetStDev = 0
			for iSample in range(nTrain+nValid+nTest):
				I = im2double(tifread('%s/I%05d_Img.tif' % (imPath,iSample)))
				datasetMean += np.mean(I)
				datasetStDev += np.std(I)
			datasetMean /= (nTrain+nValid+nTest)
			datasetStDev /= (nTrain+nValid+nTest)
			saveData(datasetMean, pathjoin(modelPath,'datasetMean.data'))
			saveData(datasetStDev, pathjoin(modelPath,'datasetStDev.data'))

		perm = np.arange(nTrain+nValid+nTest)
		np.random.shuffle(perm)

		for iSample in range(0, nTrain):
			path = '%s/I%05d_Img.tif' % (imPath,perm[iSample])
			im = im2double(tifread(path))
			Train[iSample,:,:,0] = (im-datasetMean)/datasetStDev
			path = '%s/I%05d_Ant.tif' % (imPath,perm[iSample])
			im = tifread(path)
			for i in range(nClasses):
				LTrain[iSample,:,:,i] = (im == i+1)

		for iSample in range(0, nValid):
			path = '%s/I%05d_Img.tif' % (imPath,perm[nTrain+iSample])
			im = im2double(tifread(path))
			Valid[iSample,:,:,0] = (im-datasetMean)/datasetStDev
			path = '%s/I%05d_Ant.tif' % (imPath,perm[nTrain+iSample])
			im = tifread(path)
			for i in range(nClasses):
				LValid[iSample,:,:,i] = (im == i+1)

		for iSample in range(0, nTest):
			path = '%s/I%05d_Img.tif' % (imPath,perm[nTrain+nValid+iSample])
			im = im2double(tifread(path))
			Test[iSample,:,:,0] = (im-datasetMean)/datasetStDev
			path = '%s/I%05d_Ant.tif' % (imPath,perm[nTrain+nValid+iSample])
			im = tifread(path)
			for i in range(nClasses):
				LTest[iSample,:,:,i] = (im == i+1)

		# --------------------------------------------------
		# optimization
		# --------------------------------------------------

		tfLabels = tf.placeholder("float", shape=[None,imSize,imSize,nClasses],name='labels')

		globalStep = tf.Variable(0,trainable=False)
		learningRate0 = 0.01
		decaySteps = 1000
		decayRate = 0.95
		learningRate = tf.train.exponential_decay(learningRate0,globalStep,decaySteps,decayRate,staircase=True)

		with tf.name_scope('optim'):
			loss = tf.reduce_mean(-tf.reduce_sum(tf.multiply(tfLabels,tf.log(UNet2D.nn)),3))
			updateOps = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
			# optimizer = tf.train.MomentumOptimizer(1e-3,0.9)
			optimizer = tf.train.MomentumOptimizer(learningRate,0.9)
			# optimizer = tf.train.GradientDescentOptimizer(learningRate)
			with tf.control_dependencies(updateOps):
				optOp = optimizer.minimize(loss,global_step=globalStep)

		with tf.name_scope('eval'):
			error = []
			for iClass in range(nClasses):
				labels0 = tf.reshape(tf.to_int32(tf.slice(tfLabels,[0,0,0,iClass],[-1,-1,-1,1])),[batchSize,imSize,imSize])
				predict0 = tf.reshape(tf.to_int32(tf.equal(tf.argmax(UNet2D.nn,3),iClass)),[batchSize,imSize,imSize])
				correct = tf.multiply(labels0,predict0)
				nCorrect0 = tf.reduce_sum(correct)
				nLabels0 = tf.reduce_sum(labels0)
				error.append(1-tf.to_float(nCorrect0)/tf.to_float(nLabels0))
			errors = tf.tuple(error)

		# --------------------------------------------------
		# inspection
		# --------------------------------------------------

		with tf.name_scope('scalars'):
			tf.summary.scalar('avg_cross_entropy', loss)
			for iClass in range(nClasses):
				tf.summary.scalar('avg_pixel_error_%d' % iClass, error[iClass])
			tf.summary.scalar('learning_rate', learningRate)
		with tf.name_scope('images'):
			split0 = tf.slice(UNet2D.nn,[0,0,0,0],[-1,-1,-1,1])
			split1 = tf.slice(UNet2D.nn,[0,0,0,1],[-1,-1,-1,1])
			if nClasses > 2:
				split2 = tf.slice(UNet2D.nn,[0,0,0,2],[-1,-1,-1,1])
			tf.summary.image('pm0',split0)
			tf.summary.image('pm1',split1)
			if nClasses > 2:
				tf.summary.image('pm2',split2)
		merged = tf.summary.merge_all()


		# --------------------------------------------------
		# session
		# --------------------------------------------------

		saver = tf.train.Saver()
		sess = tf.Session(config=tf.ConfigProto(allow_soft_placement=True)) # config parameter needed to save variables when using GPU

		if os.path.exists(outLogPath):
			shutil.rmtree(outLogPath)
		trainWriter = tf.summary.FileWriter(trainWriterPath, sess.graph)
		validWriter = tf.summary.FileWriter(validWriterPath, sess.graph)

		if restoreVariables:
			saver.restore(sess, outModelPath)
			print("Model restored.")
		else:
			sess.run(tf.global_variables_initializer())

		# --------------------------------------------------
		# train
		# --------------------------------------------------

		batchData = np.zeros((batchSize,imSize,imSize,nChannels))
		batchLabels = np.zeros((batchSize,imSize,imSize,nClasses))
		for i in range(nSteps):
			# train

			perm = np.arange(nTrain)
			np.random.shuffle(perm)

			for j in range(batchSize):
				batchData[j,:,:,:] = Train[perm[j],:,:,:]
				batchLabels[j,:,:,:] = LTrain[perm[j],:,:,:]

			summary,_ = sess.run([merged,optOp],feed_dict={UNet2D.tfData: batchData, tfLabels: batchLabels, UNet2D.tfTraining: 1})
			trainWriter.add_summary(summary, i)

			# validation

			perm = np.arange(nValid)
			np.random.shuffle(perm)

			for j in range(batchSize):
				batchData[j,:,:,:] = Valid[perm[j],:,:,:]
				batchLabels[j,:,:,:] = LValid[perm[j],:,:,:]

			summary, es = sess.run([merged, errors],feed_dict={UNet2D.tfData: batchData, tfLabels: batchLabels, UNet2D.tfTraining: 0})
			validWriter.add_summary(summary, i)

			e = np.mean(es)
			print('step %05d, e: %f' % (i,e))

			if i == 0:
				if restoreVariables:
					lowestError = e
				else:
					lowestError = np.inf

			if np.mod(i,100) == 0 and e < lowestError:
				lowestError = e
				print("Model saved in file: %s" % saver.save(sess, outModelPath))


		# --------------------------------------------------
		# test
		# --------------------------------------------------

		if not os.path.exists(outPMPath):
			os.makedirs(outPMPath)

		for i in range(nTest):
			j = np.mod(i,batchSize)

			batchData[j,:,:,:] = Test[i,:,:,:]
			batchLabels[j,:,:,:] = LTest[i,:,:,:]
		 
			if j == batchSize-1 or i == nTest-1:

				output = sess.run(UNet2D.nn,feed_dict={UNet2D.tfData: batchData, tfLabels: batchLabels, UNet2D.tfTraining: 0})

				for k in range(j+1):
					pm = output[k,:,:,testPMIndex]
					gt = batchLabels[k,:,:,testPMIndex]
					im = np.sqrt(normalize(batchData[k,:,:,0]))
					imwrite(np.uint8(255*np.concatenate((im,np.concatenate((pm,gt),axis=1)),axis=1)),'%s/I%05d.png' % (outPMPath,i-j+k+1))


		# --------------------------------------------------
		# save hyper-parameters, clean-up
		# --------------------------------------------------

		saveData(UNet2D.hp,pathjoin(modelPath,'hp.data'))

		trainWriter.close()
		validWriter.close()
		sess.close()
Example #33
0
    def fit(
        self,
        ids_train,
        ids_test,
        y_train,
        y_test,
        dense_train=None,
        dense_test=None,
        lr=0.001,
        N_EPOCH=50,
        batch_size=200,
        early_stopping_rounds=20,
    ):
        start_time = time.time()
        #[bug fix]mutable prevention 19/06/27
        ids_train = ids_train.copy()
        ids_test = ids_test.copy()

        self.batch_size = batch_size
        #data preprocess:对ids的每个features,label encoder都要从上一个的末尾开始。函数输入时则保证每个都从0起.
        if self.hash_size is None:
            for i, column in enumerate(ids_train.columns):
                if i >= 1:
                    ids_train.loc[:, column] = ids_train[column] + sum(
                        self.features_sizes[:i])
                    ids_test.loc[:, column] = ids_test[column] + sum(
                        self.features_sizes[:i])
        if self.attention_FM or self.use_AutoInt:  #储存为classs变量并用在get_attention里获取attention
            self.ids_train, self.ids_test, self.y_train, self.y_test = ids_train, ids_test, y_train, y_test

        self.ids = tf.placeholder(tf.int32, [None, self.fields])
        self.dense_inputs = tf.placeholder(tf.float32,
                                           [None, self.dense_features_size])
        self.y = tf.placeholder(tf.float32, [None, 1])
        self.L2_reg = 0

        self.dropout_keeprate_holder = tf.placeholder(tf.float32)

        embed_L2 = 0
        if self.use_FM or self.use_MLP or self.use_AutoInt:
            self.embedding, embed_L2 = self.Embedding(
                self.ids, self.embedding_weights)  #(None,fields,k)

        if self.use_SE:
            self.embeddingSE = self.SELayer(self.embedding, self.SE_weights)

        self.pred = 0
        if self.use_LR:
            #bug detected. LR didn't keepdims
            self.pred = self.LR(self.ids, self.w, self.b)
        if self.use_MLR:
            print("use Mix of LR.")
            self.pred += self.MLR(self.ids, self.MLR_u, self.MLR_w)

        #only one FM will be used.
        if self.use_NFM:
            print("use NFM")
            self.pred += self.NFM(self.embedding, self.NFM_weights)
        elif self.use_BiFM:
            if self.use_SE:
                cross_term = tf.concat([
                    self.Bilinear_FM(
                        self.embedding, self.bilinear_weights, se_emb=False),
                    self.Bilinear_FM(
                        self.embeddingSE, self.bilinear_weights, se_emb=True),
                ],
                                       axis=-1)  # N,c,2k
                if self.use_FiBiNet:
                    print("use FiBiNet")  #deep backend
                    cross_term = tf.reshape(
                        cross_term, [-1, self.c * self.k * 2])  #None,2ck
                    cross_term = tf.nn.relu(
                        tf.matmul(cross_term, self.FiBiNet_weights['W1']) +
                        self.FiBiNet_weights['b1'])
                    self.pred += (
                        tf.matmul(cross_term, self.FiBiNet_weights['W2']) +
                        self.FiBiNet_weights['b2'])
                else:
                    print("use Fibifm")
                    self.pred += tf.expand_dims(tf.reduce_sum(cross_term,
                                                              axis=[1, 2]),
                                                axis=1)  # N,1

            else:
                print("use bifm")
                cross_term = self.Bilinear_FM(self.embedding,
                                              self.bilinear_weights,
                                              se_emb=False)  # N,c,k
                self.pred += tf.expand_dims(tf.reduce_sum(cross_term,
                                                          axis=[1, 2]),
                                            axis=1)  # N,1

        elif self.use_FM and not self.attention_FM and not self.use_CFM:
            print("use FM")
            if len(self.FM_ignore_interaction
                   ) == 0:  #if self.use_FM and self.FM_ignore_interaction==[]
                self.pred += self.FM2(self.embedding)
            if len(self.FM_ignore_interaction) > 0:
                self.pred += self.FMDE(self.embedding)
        elif self.use_FM and self.attention_FM:
            print("use AFM")
            afm_out, reg = self.AFM(self.embedding, self.AFM_weights)
            self.pred += afm_out
            self.L2_reg += reg
        elif self.use_FM and self.use_CFM:
            print("use CFM")
            cfm_out, reg = self.CFM(self.embedding, self.CFM_weights)
            self.pred += cfm_out
            self.L2_reg += reg

        if self.use_AutoInt:
            self.y_deep = self.embedding
            for _l in range(self.autoint_params['autoint_layers']):
                self.y_deep = self.AutoInt(self.y_deep,
                                           self.AutoInt_weights,
                                           layer=_l)  #N,f,d
            self.pred += tf.matmul(
                tf.reshape(self.y_deep,
                           shape=[
                               -1,
                               self.fields * self.autoint_d * self.autoint_head
                           ]),
                self.AutoInt_weights['W_out']) + self.AutoInt_weights['b_out']

        if self.use_CrossNet_layers > 0:  #combine crossnet with DNN
            MLP_in = tf.reshape(self.embedding,
                                [-1, self.fields * self.k])  #(N,f*k)
            if self.dense_features_size > 0:
                MLP_in = tf.concat([MLP_in, self.dense_inputs],
                                   axis=1)  #(N,f*k+dense)
            self.MLP_out = self.MLP(MLP_in,
                                    self.weights,
                                    self.bias,
                                    return_pred=False)  #(None,last_layers)
            self.CrossNet_out = self.CrossNet(tf.expand_dims(
                MLP_in, axis=-1), self.CrossNet_weights)  #(None,f*k+d)
            self.pred += tf.keras.layers.Dense(
                1, use_bias=False,
                activation=None)(tf.concat([self.MLP_out, self.CrossNet_out],
                                           axis=1))
        elif self.use_MLP:  #并联dnn pred
            MLP_in = tf.reshape(self.embedding,
                                [-1, self.fields * self.k])  #(N,f*k)
            if self.dense_features_size > 0:
                MLP_in = tf.concat([MLP_in, self.dense_inputs],
                                   axis=1)  #(N,f*k+dense)
            self.pred += self.MLP(MLP_in, self.weights, self.bias)
        assert self.pred is not None, "must have one predicion layer"

        if self.loss_type == 'rmse':
            self.loss = tf.sqrt(tf.reduce_mean(tf.square(self.y - self.pred)))
        elif self.loss_type == 'mse':
            self.loss = tf.reduce_mean(tf.square(self.y - self.pred))
        elif self.loss_type in ['binary_crossentropy', 'binary', 'logloss']:
            self.loss = tf.reduce_mean(
                tf.nn.sigmoid_cross_entropy_with_logits(labels=self.y,
                                                        logits=self.pred))
        else:
            raise Exception("Loss type %s not supported" % self.loss_type)

        #todo EMBEDL2 coef
        self.loss += self.lambda_l2 * self.L2_reg  #+ embed_L2*1e-5
        self.optimizer = tf.train.AdamOptimizer(lr).minimize(self.loss)

        if self.metric_type is not None:
            assert self.metric_type == 'auc'
            assert self.loss_type in [
                'binary_crossentropy', 'binary', 'logloss'
            ]
            #tf.auc mode: remove sklearn auc part
            #self.loss=tf.metrics.auc(labels=self.y,predictions=tf.nn.sigmoid(self.pred))

        self.sess = self._init_session()
        self.sess.run(tf.global_variables_initializer())
        self.sess.run(tf.local_variables_initializer())

        cur_best_rounds = 0

        is_greater_better = False if self.metric_type is None else True  #默认Loss越小越好
        cur_min_loss = 1e8 if not is_greater_better else -1e8
        best_weights = {
            v.name: v.eval(self.sess)
            for v in tf.trainable_variables()
        }

        for epoch in range(N_EPOCH):
            train_loss = 0.
            y_preds_train = []
            total_batches = int(ids_train.shape[0] / batch_size)
            # id input + dense input
            for bx, bx_dense, by in batcher(ids_train,
                                            y_train,
                                            X_dense=dense_train,
                                            batch_size=batch_size,
                                            hash_size=self.hash_size):
                if self.dense_features_size > 0:
                    _, l = self.sess.run(
                        [self.optimizer, self.loss],
                        feed_dict={
                            self.ids: bx,
                            self.y: by,
                            self.dense_inputs: bx_dense,
                            self.dropout_keeprate_holder: self.dropout_keeprate
                        })
                else:
                    _, l = self.sess.run(
                        [self.optimizer, self.loss],
                        feed_dict={
                            self.ids: bx,
                            self.y: by,
                            self.dropout_keeprate_holder: self.dropout_keeprate
                        })
                train_loss += l  #if not self.metric_type else l[1]
                if self.metric_type:
                    y_preds_train.append(self.sess.run(self.pred,feed_dict={self.ids:bx,self.dense_inputs:bx_dense,self.dropout_keeprate_holder:1.0})) \
                    if self.dense_features_size>0 \
                    else y_preds_train.append(self.sess.run(self.pred,feed_dict={self.ids:bx,self.dropout_keeprate_holder:1.0}))
            train_loss /= total_batches

            if self.coldStartAvg:
                print("Cold Start Averaging start") if epoch == 0 else None
                self.coldStartAvgTool()

            #todo movielens afm rounded
            test_loss = 0.
            y_preds = []
            for bx, bx_dense, by in batcher(ids_test,
                                            y_test,
                                            X_dense=dense_test,
                                            batch_size=batch_size,
                                            hash_size=self.hash_size):
                if self.dense_features_size > 0:
                    l = self.sess.run(self.loss,
                                      feed_dict={
                                          self.ids: bx,
                                          self.y: by,
                                          self.dense_inputs: bx_dense,
                                          self.dropout_keeprate_holder: 1.0
                                      })
                else:
                    l = self.sess.run(self.loss,
                                      feed_dict={
                                          self.ids: bx,
                                          self.y: by,
                                          self.dropout_keeprate_holder: 1.0
                                      })
                test_loss += l  #if not self.metric_type else l[1]
                if self.metric_type:
                    y_preds.append(self.sess.run(self.pred,feed_dict={self.ids:bx,self.dense_inputs:bx_dense,self.dropout_keeprate_holder:1.0})) \
                    if self.dense_features_size>0 \
                    else y_preds.append(self.sess.run(self.pred,feed_dict={self.ids:bx,self.dropout_keeprate_holder:1.0}))

            test_loss /= int(ids_test.shape[0] / batch_size)
            '''
            y_pred=np.concatenate(y_preds, axis=0).reshape((-1))
            predictions_bounded = np.maximum(y_pred, np.ones(len(y_pred)) * -1)  # bound the lower values
            predictions_bounded = np.minimum(predictions_bounded, np.ones(len(y_pred)) * 1)  # bound the higher values
            # override test_loss
            test_loss = np.sqrt(np.mean(np.square(y_test.reshape(predictions_bounded.shape)- predictions_bounded)))
            '''
            #sklearn auc mode
            if self.metric_type:  # override test_loss
                self.y_pred_train = np.concatenate(y_preds_train, axis=0)
                self.y_pred = np.concatenate(y_preds, axis=0)
                train_loss = roc_auc_score(y_train, self.y_pred_train)
                test_loss = roc_auc_score(y_test, self.y_pred)

            metrics_ = 'loss' if self.metric_type is None else 'auc'
            print("epoch:%s train_%s:%s test_%s:%s" %
                  (epoch + 1, metrics_, train_loss, metrics_, test_loss))
            #print("self.pred=",self.sess.run(self.pred,feed_dict={self.ids:ids_test,self.y:y_test}))
            #print("self.y=",y_test)

            if isBetter(test_loss, cur_min_loss, is_greater_better):
                cur_min_loss = test_loss
                cur_best_rounds = epoch + 1
                best_weights = {
                    v.name: v.eval(self.sess)
                    for v in tf.trainable_variables()
                }
            if epoch + 1 - cur_best_rounds >= early_stopping_rounds:
                print(
                    "[Early Stop]Early Stopping because not improved for %s rounds"
                    % early_stopping_rounds)
                self.sess.run(
                    tf.tuple([
                        tf.assign(var, best_weights[var.name])
                        for var in tf.trainable_variables()
                    ]))
                best_score = cur_min_loss  #self.sess.run(self.loss, feed_dict={self.ids: ids_test, self.y: y_test, })
                print("[Early Stop]Best Score:", best_score, ' at round ',
                      cur_best_rounds)
                print(
                    "Train finish. Fit time:%.2f seconds. Epoch time:%.2f seconds"
                    % (time.time() - start_time,
                       (time.time() - start_time) / (epoch + 1)))
                return best_score

            #auc reset op
            self.sess.run(tf.local_variables_initializer())

        self.sess.run(
            tf.tuple([
                tf.assign(var, best_weights[var.name])
                for var in tf.trainable_variables()
            ]))
        best_score = cur_min_loss  #self.sess.run(self.loss, feed_dict={self.ids: ids_test, self.y: y_test,})
        print("[Epoch Maxi]Best Score:", best_score, ' at round ',
              cur_best_rounds)
        print("Train finish. Fit time:%.2f seconds. Epoch time:%.2f seconds" %
              (time.time() - start_time, (time.time() - start_time) / N_EPOCH))
        return best_score
Example #34
0
    def __call__(self, dataset, moving_params=None):
        """"""

        vocabs = dataset.vocabs
        inputs = dataset.inputs
        targets = dataset.targets

        reuse = (moving_params is not None)
        self.tokens_to_keep3D = tf.expand_dims(
            tf.to_float(tf.greater(inputs[:, :, 0], vocabs[0].ROOT)), 2)
        self.sequence_lengths = tf.reshape(
            tf.reduce_sum(self.tokens_to_keep3D, [1, 2]), [-1, 1])
        self.n_tokens = tf.reduce_sum(self.sequence_lengths)
        self.moving_params = moving_params

        word_inputs = vocabs[0].embedding_lookup(
            inputs[:, :, 0], inputs[:, :, 1], moving_params=self.moving_params)
        tag_inputs = vocabs[1].embedding_lookup(
            inputs[:, :, 2], moving_params=self.moving_params)

        top_recur = self.embed_concat(word_inputs, tag_inputs)
        for i in xrange(self.n_recur):
            with tf.variable_scope('RNN%d' % i, reuse=reuse):
                top_recur, _ = self.RNN(top_recur)

        top_mlp = top_recur
        if self.n_mlp > 0:
            with tf.variable_scope('MLP0', reuse=reuse):
                dep_mlp, head_mlp, rel_mlp = self.MLP(top_mlp, n_splits=3)
            for i in xrange(1, self.n_mlp):
                with tf.variable_scope('DepMLP%d' % i, reuse=reuse):
                    dep_mlp = self.MLP(dep_mlp)
                with tf.variable_scope('HeadMLP%d' % i, reuse=reuse):
                    head_mlp = self.MLP(head_mlp)
                with tf.variable_scope('RelMLP%d' % i, reuse=reuse):
                    rel_mlp = self.MLP(rel_mlp)
        else:
            dep_mlp = head_mlp = rel_mlp = top_mlp

        with tf.variable_scope('Parses', reuse=reuse):
            parse_logits = self.bilinear_classifier(dep_mlp,
                                                    head_mlp,
                                                    add_bias1=True)
            parse_output = self.output(parse_logits, targets[:, :, 1])
        with tf.variable_scope('Rels', reuse=reuse):
            rel_logits = self.linear_classifier(rel_mlp, len(vocabs[2]))
            rel_output = self.output(rel_logits, targets[:, :, 2])

        output = {}
        output['probabilities'] = tf.tuple(
            [parse_output['probabilities'], rel_output['probabilities']])
        output['predictions'] = tf.pack(
            [parse_output['predictions'], rel_output['predictions']])
        output['correct'] = parse_output['correct'] * rel_output['correct']
        output['tokens'] = parse_output['tokens']
        output['n_correct'] = tf.reduce_sum(output['correct'])
        output['n_tokens'] = self.n_tokens
        output['accuracy'] = output['n_correct'] / output['n_tokens']
        output['loss'] = parse_output['loss'] + rel_output['loss']

        output['embed'] = tf.pack([word_inputs, tag_inputs])
        output['recur'] = top_recur
        output['dep'] = dep_mlp
        output['head'] = head_mlp
        output['rel'] = rel_mlp
        output['parse_logits'] = parse_logits
        output['rel_logits'] = rel_logits
        return output
Example #35
0
    def buildModel(self, inputShape):
        assert (inputShape[0] % self.VStrideY == 0)
        assert (inputShape[1] % self.VStrideX == 0)
        V_Y = int(inputShape[0] / self.VStrideY)
        V_X = int(inputShape[1] / self.VStrideX)
        self.imageShape = (self.batchSize, inputShape[0], inputShape[1],
                           inputShape[2])
        self.WShape = (self.patchSizeY, self.patchSizeX, 3, self.numV)
        self.VShape = (self.batchSize, V_Y, V_X, self.numV)

        #Running on GPU
        with tf.device(self.device):
            with tf.name_scope("inputOps"):
                #Get convolution variables as placeholders
                self.inputImage = node_variable(self.imageShape, "inputImage")
                #Scale inputImage
                self.scaled_inputImage = self.inputImage / np.sqrt(
                    self.patchSizeX * self.patchSizeY * inputShape[2])

            with tf.name_scope("Dictionary"):
                self.V1_W = sparse_weight_variable(self.WShape, "V1_W")

            with tf.name_scope("weightNorm"):
                self.normVals = tf.sqrt(
                    tf.reduce_sum(tf.square(self.V1_W),
                                  reduction_indices=[0, 1, 2],
                                  keep_dims=True))
                self.normalize_W = self.V1_W.assign(self.V1_W /
                                                    (self.normVals + 1e-8))

            with tf.name_scope("FISTA"):
                #Soft threshold
                self.V1_A = weight_variable(self.VShape, "V1_A", 1e-3)
                self.V1_Y = weight_variable(self.VShape, "V1_Y", 1e-3)
                self.T = tf.Variable(1.0, "T")

                self.oldA = weight_variable(self.VShape, "oldA", 1e-3)
                self.oldY = weight_variable(self.VShape, "oldY", 1e-3)
                self.oldT = tf.Variable(1.0, "oldT")

                self.randV1 = tf.truncated_normal(self.VShape,
                                                  mean=0,
                                                  stddev=1e-3)
                #Reassign nodes
                self.resetV1 = self.V1_A.assign(self.randV1)
                self.resetT = self.T.assign(1.0)
                self.resetY = self.V1_Y.assign(self.V1_A)

            with tf.name_scope("Recon"):
                assert (self.VStrideY >= 1)
                assert (self.VStrideX >= 1)
                #We build index tensor in numpy to gather
                self.recon = conv2d_oneToMany(self.V1_A, self.V1_W,
                                              self.imageShape, "recon",
                                              self.VStrideY, self.VStrideX)

            with tf.name_scope("Error"):
                self.error = self.scaled_inputImage - self.recon

            with tf.name_scope("Loss"):
                self.reconError = tf.reduce_mean(
                    tf.reduce_sum(tf.square(self.error),
                                  reduction_indices=[1, 2, 3]))
                self.l1Sparsity = tf.reduce_mean(
                    tf.reduce_sum(tf.abs(self.V1_A),
                                  reduction_indices=[1, 2, 3]))
                #Define loss
                self.loss = self.reconError / 2 + self.thresh * self.l1Sparsity

            with tf.name_scope("Opt"):
                ##Define optimizer
                ##self.optimizerA = tf.train.GradientDescentOptimizer(self.learningRateA).minimize(self.loss,
                #self.optimizerA = tf.train.AdamOptimizer(self.learningRateA).minimize(self.loss,
                #        var_list=[
                #            self.V1_A
                #        ])
                self.reconGrad = self.learningRateA * tf.gradients(
                    self.reconError, [self.V1_A])[0]

                #Store old values in tensors
                #This is to avoid updating a variable too early to affect new values
                self.optimizerA0 = tf.tuple([
                    self.oldA.assign(self.V1_A),
                    self.oldT.assign(self.T),
                    self.oldY.assign(self.V1_Y),
                ])

                self.newA = tf.nn.relu(
                    tf.abs(self.oldY - self.reconGrad) -
                    self.thresh * self.learningRateA) * tf.sign(self.oldA)
                self.newT = (1 + tf.sqrt(4 * tf.square(self.oldT))) / 2
                self.newY = self.newA + (
                    (self.oldT - 1) /
                    (self.newT + 1e-8)) * (self.newA - self.oldA)

                #We update actual variables
                self.optimizerA1 = self.V1_Y.assign(self.newY)
                self.optimizerA2 = self.T.assign(self.newT)
                self.optimizerA3 = self.V1_A.assign(self.newA)
                self.optimizerA = tf.tuple(
                    [self.optimizerA1, self.optimizerA2, self.optimizerA3])

                self.optimizerW = tf.train.AdadeltaOptimizer(
                    self.learningRateW,
                    epsilon=1e-6).minimize(self.loss, var_list=[self.V1_W])

            with tf.name_scope("stats"):
                self.nnz = tf.reduce_mean(
                    tf.cast(tf.not_equal(self.V1_A, 0), tf.float32))

                self.errorStd = tf.sqrt(
                    tf.reduce_mean(
                        tf.square(self.error - tf.reduce_mean(self.error)))
                ) * np.sqrt(self.patchSizeY * self.patchSizeX * inputShape[2])
                self.l1_mean = tf.reduce_mean(tf.abs(self.V1_A))

                self.weightImages = tf.transpose(self.V1_W, [3, 0, 1, 2])

                #For log of activities
                self.log_V1_A = tf.log(tf.abs(self.V1_A) + 1e-15)

        #Summaries
        self.s_loss = tf.scalar_summary('loss', self.loss, name="lossSum")
        self.s_recon = tf.scalar_summary('recon error',
                                         self.reconError,
                                         name="reconError")
        self.s_errorStd = tf.scalar_summary('errorStd',
                                            self.errorStd,
                                            name="errorStd")
        self.s_l1 = tf.scalar_summary('l1 sparsity',
                                      self.l1Sparsity,
                                      name="l1Sparsity")
        self.s_l1_mean = tf.scalar_summary('l1 mean',
                                           self.l1_mean,
                                           name="l1Mean")
        self.s_s_nnz = tf.scalar_summary('nnz', self.nnz, name="nnz")

        self.h_input = tf.histogram_summary('input',
                                            self.inputImage,
                                            name="input")
        self.h_recon = tf.histogram_summary('recon', self.recon, name="recon")
        self.h_v1_w = tf.histogram_summary('V1_W', self.V1_W, name="V1_W")

        self.h_v1_a = tf.histogram_summary('V1_A', self.V1_A, name="V1_A")
        self.h_log_v1_a = tf.histogram_summary('Log_V1_A',
                                               self.log_V1_A,
                                               name="Log_V1_A")

        self.h_normVals = tf.histogram_summary('normVals',
                                               self.normVals,
                                               name="normVals")
Example #36
0
    def _rcn_head(self,
                  inputs,
                  image_shape,
                  nms_threshold,
                  rpn_thresholds,
                  rcn_batch,
                  batch_size,
                  name='rcn_head',
                  **kwargs):
        anchors_labels = self.anchors_placeholders['labels']
        feature_maps, rpn_reg, rpn_cls = inputs
        n_anchors = self.n_anchors

        with tf.variable_scope(name):
            rcn_input_indices = non_max_suppression(
                rpn_reg,
                rpn_cls,
                batch_size,
                n_anchors,
                iou_threshold=nms_threshold,
                score_threshold=rpn_thresholds[1],
                nonempty=True)

            rcn_input_indices = tf.cond(
                self.is_training,
                lambda: self.create_bbox_batch(rcn_input_indices, rcn_batch),
                lambda: rcn_input_indices)

            rcn_input_rois, rcn_input_labels = self._get_rois_and_labels(
                rpn_reg, anchors_labels, rcn_input_indices)

            for tensor in rcn_input_rois:
                tf.add_to_collection('roi', tensor)
            for tensor in rcn_input_labels:
                tf.add_to_collection('targets', tensor)
            roi_factor = np.array(self.map_shape / image_shape)

            rcn_input_rois = self.stop_gradient_tuple(rcn_input_rois)
            rcn_input_labels = self.stop_gradient_tuple(rcn_input_labels)

            roi_cropped = roi_pooling_layer(feature_maps,
                                            rcn_input_rois,
                                            factor=roi_factor,
                                            shape=(7, 7),
                                            data_format=kwargs['data_format'])
            indices, roi_cropped, rcn_input_labels = self._stack_tuple(
                roi_cropped, rcn_input_labels)  # pylint: disable=unbalanced-tuple-unpacking
            rcn_clsf = conv_block(roi_cropped,
                                  'f',
                                  units=10,
                                  name='output_conv',
                                  **kwargs)

            loss = self.rcn_loss(rcn_clsf, rcn_input_labels)

            rcn_clsf = tf.argmax(rcn_clsf, axis=-1)
            rcn_clsf = self._unstack_tuple(rcn_clsf, indices)
            rcn_clsf = tf.tuple(rcn_clsf, name='clsf')
            for tensor in rcn_clsf:
                tf.add_to_collection('rcn_output', tensor)
            loss = tf.identity(loss, 'loss')

        return rcn_clsf, loss
Example #37
0
    def lstm_def(self, rnn_input, seq_len):
        # Automatically reset state in each batch
        # Define cells of acoustic model
        with tf.variable_scope('LSTM'):
            def lstm_cell():
                if self.proj_dim == self.hidden_size:
                    return tf.contrib.rnn.LSTMCell(
                            self.hidden_size, use_peepholes=self.use_peepholes,
                            forget_bias = 0.0,
                            state_is_tuple=self.state_is_tuple, reuse=tf.get_variable_scope().reuse)
                else:
                    return tf.contrib.rnn.LSTMCell(
                            self.hidden_size, use_peepholes=self.use_peepholes, 
                            num_proj=self.proj_dim, forget_bias = 0.0, 
                            state_is_tuple=self.state_is_tuple, reuse=tf.get_variable_scope().reuse)

            layers_list = []
            for n in range(self.num_layers):
                cell = lstm_cell()
                if not self.forward_only:
                    if self.keep_prob < 1.0:
                        cell = tf.contrib.rnn.DropoutWarpper(cell, output_keep_prob = self.keep_prob)
                layers_list.append(cell)

            # Store the layers in a multi-layer RNN
            cell = tf.contrib.rnn.MultiRNNCell(layers_list, state_is_tuple=self.state_is_tuple)

        # Define some variables to store the RNN state
        # Note : tensorflow keep the state inside a batch but it's necessary to do this in order to keep the state
        #        between batches, especially when doing live transcript
        #        Another way would have been to get the state as an output of the session and feed it every time but
        #        this way is much more efficient
        with tf.variable_scope('Hidden_state'):
            state_variables = []
            for state_c, state_h in cell.zero_state(self.batch_size, tf.float32):
                state_variables.append(tf.contrib.rnn.LSTMStateTuple(
                    tf.Variable(state_c, trainable=False),
                    tf.Variable(state_h, trainable=False)))
            # Return as a tuple, so that it can be fed to dynamic_rnn as an initial state
            rnn_tuple_state = tuple(state_variables)

        # Build the RNN
        with tf.name_scope("LSTM"):
            rnn_outputs, new_states = tf.nn.dynamic_rnn(cell=cell,
                    inputs=rnn_input,
                    sequence_length=seq_len,
                    initial_state=rnn_tuple_state,
                    dtype=tf.float32,
                    time_major=self.time_major)
#        print("rnn_outputs:",rnn_outputs.shape[2])

        # Define an op to keep the hidden state between batches
        update_ops = []
        for state_variable, new_state in zip(rnn_tuple_state, new_states):
            # Assign the new state to the state variables on this layer
            update_ops.extend([state_variable[0].assign(new_state[0]),
                state_variable[1].assign(new_state[1])])
        # Return a tuple in order to combine all update_ops into a single operation.
        # The tuple's actual value should not be used.
        rnn_keep_state_op = tf.tuple(update_ops)
        
        # Define an op to reset the hidden state to zeros
        update_ops = []
        for state_variable in rnn_tuple_state:
            # Assign the new state to the state variables on this layer
            update_ops.extend([state_variable[0].assign(tf.zeros_like(state_variable[0])),
                state_variable[1].assign(tf.zeros_like(state_variable[1]))])
        # Return a tuple in order to combine all update_ops into a single operation.
        # The tuple's actual value should not be used.
        rnn_state_zero_op = tf.tuple(update_ops)


        if not self.time_major:
            rnn_outputs = tf.transpose(rnn_outputs, [1, 0, 2]) # [time, batch_size, cell_outdim]
        
        return rnn_outputs, rnn_keep_state_op, rnn_state_zero_op
        batch_size = self.batch_size
        print(batch_size,self.proj_dim,self.output_size,seq_len.shape)
        rnn_outputs = tf.reshape(rnn_outputs, [-1, self.proj_dim])
        logits = tf.matmul(rnn_outputs, self.W) + self.bias
        logits = tf.reshape(logits, [-1, batch_size, self.output_size])
        #output_log = tf.nn.softmax(logits)
        #output_log = tf.reshape(output_log, [seq_len.shape, -1, self.output_size])
        return logits, rnn_keep_state_op, rnn_state_zero_op
Example #38
0
    def buildModel(self, inputShape):
        assert(inputShape[0] % self.VStrideY == 0)
        assert(inputShape[1] % self.VStrideX == 0)
        V_Y = int(inputShape[0]/self.VStrideY)
        V_X = int(inputShape[1]/self.VStrideX)
        self.imageShape = (self.batchSize, inputShape[0], inputShape[1], inputShape[2])
        self.WShape = (self.patchSizeY, self.patchSizeX, 3, self.numV)
        self.VShape = (self.batchSize, V_Y, V_X, self.numV)

        #Running on GPU
        with tf.device(self.device):
            with tf.name_scope("inputOps"):
                #Get convolution variables as placeholders
                self.inputImage = node_variable(self.imageShape, "inputImage")
                #Scale inputImage
                self.scaled_inputImage = self.inputImage/np.sqrt(self.patchSizeX*self.patchSizeY*inputShape[2])

            with tf.name_scope("Dictionary"):
                self.V1_W = sparse_weight_variable(self.WShape, "V1_W")

            with tf.name_scope("weightNorm"):
                self.normVals = tf.sqrt(tf.reduce_sum(tf.square(self.V1_W), reduction_indices=[0, 1, 2], keep_dims=True))
                self.normalize_W = self.V1_W.assign(self.V1_W/(self.normVals + 1e-8))

            with tf.name_scope("FISTA"):
                #Soft threshold
                self.V1_A = weight_variable(self.VShape, "V1_A", 1e-3)
                self.V1_Y = weight_variable(self.VShape, "V1_Y", 1e-3)
                self.T = tf.Variable(1.0, "T")

                self.oldA = weight_variable(self.VShape, "oldA", 1e-3)
                self.oldY = weight_variable(self.VShape, "oldY", 1e-3)
                self.oldT = tf.Variable(1.0, "oldT")

                self.randV1 = tf.truncated_normal(self.VShape, mean=0, stddev=1e-3)
                #Reassign nodes
                self.resetV1 = self.V1_A.assign(self.randV1)
                self.resetT = self.T.assign(1.0)
                self.resetY = self.V1_Y.assign(self.V1_A)

            with tf.name_scope("Recon"):
                assert(self.VStrideY >= 1)
                assert(self.VStrideX >= 1)
                #We build index tensor in numpy to gather
                self.recon = conv2d_oneToMany(self.V1_A, self.V1_W, self.imageShape, "recon", self.VStrideY, self.VStrideX)

            with tf.name_scope("Error"):
                self.error = self.scaled_inputImage - self.recon

            with tf.name_scope("Loss"):
                self.reconError = tf.reduce_mean(tf.reduce_sum(tf.square(self.error), reduction_indices=[1, 2, 3]))
                self.l1Sparsity = tf.reduce_mean(tf.reduce_sum(tf.abs(self.V1_A), reduction_indices=[1, 2, 3]))
                #Define loss
                self.loss = self.reconError/2 + self.thresh * self.l1Sparsity

            with tf.name_scope("Opt"):
                ##Define optimizer
                ##self.optimizerA = tf.train.GradientDescentOptimizer(self.learningRateA).minimize(self.loss,
                #self.optimizerA = tf.train.AdamOptimizer(self.learningRateA).minimize(self.loss,
                #        var_list=[
                #            self.V1_A
                #        ])
                self.reconGrad = self.learningRateA * tf.gradients(self.reconError, [self.V1_A])[0]

                #Store old values in tensors
                #This is to avoid updating a variable too early to affect new values
                self.optimizerA0 = tf.tuple([
                    self.oldA.assign(self.V1_A),
                    self.oldT.assign(self.T),
                    self.oldY.assign(self.V1_Y),
                ])

                self.newA = tf.nn.relu(tf.abs(self.oldY - self.reconGrad) - self.thresh*self.learningRateA) * tf.sign(self.oldA)
                self.newT = (1+tf.sqrt(4*tf.square(self.oldT)))/2
                self.newY = self.newA + ((self.oldT-1)/(self.newT+1e-8))*(self.newA-self.oldA)

                #We update actual variables
                self.optimizerA1 = self.V1_Y.assign(self.newY)
                self.optimizerA2 = self.T.assign(self.newT)
                self.optimizerA3 = self.V1_A.assign(self.newA)
                self.optimizerA = tf.tuple([self.optimizerA1, self.optimizerA2, self.optimizerA3])

                self.optimizerW = tf.train.AdadeltaOptimizer(self.learningRateW, epsilon=1e-6).minimize(self.loss,
                        var_list=[
                            self.V1_W
                        ])

            with tf.name_scope("stats"):
                self.nnz = tf.reduce_mean(tf.cast(tf.not_equal(self.V1_A, 0), tf.float32))

                self.errorStd = tf.sqrt(tf.reduce_mean(tf.square(self.error-tf.reduce_mean(self.error))))*np.sqrt(self.patchSizeY*self.patchSizeX*inputShape[2])
                self.l1_mean = tf.reduce_mean(tf.abs(self.V1_A))

                self.weightImages = tf.transpose(self.V1_W, [3, 0, 1, 2])

                #For log of activities
                self.log_V1_A = tf.log(tf.abs(self.V1_A)+1e-15)

        #Summaries
        self.s_loss = tf.scalar_summary('loss', self.loss, name="lossSum")
        self.s_recon = tf.scalar_summary('recon error', self.reconError, name="reconError")
        self.s_errorStd= tf.scalar_summary('errorStd', self.errorStd, name="errorStd")
        self.s_l1= tf.scalar_summary('l1 sparsity', self.l1Sparsity, name="l1Sparsity")
        self.s_l1_mean = tf.scalar_summary('l1 mean', self.l1_mean, name="l1Mean")
        self.s_s_nnz = tf.scalar_summary('nnz', self.nnz, name="nnz")

        self.h_input = tf.histogram_summary('input', self.inputImage, name="input")
        self.h_recon = tf.histogram_summary('recon', self.recon, name="recon")
        self.h_v1_w = tf.histogram_summary('V1_W', self.V1_W, name="V1_W")

        self.h_v1_a = tf.histogram_summary('V1_A', self.V1_A, name="V1_A")
        self.h_log_v1_a = tf.histogram_summary('Log_V1_A', self.log_V1_A, name="Log_V1_A")

        self.h_normVals = tf.histogram_summary('normVals', self.normVals, name="normVals")
Example #39
0
    def build_input_graph(self):
        # Identify number of channels
        mask_objects = self.config["dataset"]["locations"]["mask_objects"]
        if mask_objects:
            img_channels = len(
                self.config["dataset"]["images"]["channels"]) + 1
        else:
            img_channels = len(self.config["dataset"]["images"]["channels"])
        crop_channels = len(self.config["dataset"]["images"]["channels"])

        # Identify image and box sizes
        box_size = self.config["dataset"]["locations"]["box_size"]
        img_width = self.config["dataset"]["images"]["width"]
        img_height = self.config["dataset"]["images"]["height"]

        # Data shapes
        num_targets = len(self.dset.targets)
        crop_shape = [(box_size, box_size, crop_channels)] + [()] * num_targets
        imgs_shape = [None, img_height, img_width, img_channels]
        batch_shape = (-1, img_height, img_width, img_channels)

        # Inputs to cropping graph
        image_ph = tf.placeholder(tf.float32,
                                  shape=imgs_shape,
                                  name="raw_images")
        boxes_ph = tf.placeholder(tf.float32,
                                  shape=[None, 4],
                                  name="cell_boxes")
        box_ind_ph = tf.placeholder(tf.int32,
                                    shape=[None],
                                    name="box_indicators")
        mask_ind_ph = tf.placeholder(tf.int32,
                                     shape=[None],
                                     name="mask_indicators")
        targets_phs = {}
        for i in range(num_targets):
            tname = "target_" + str(i)
            tgt = self.dset.targets[i]
            targets_phs[tname] = tf.placeholder(tf.int32,
                                                shape=[None],
                                                name=tname)

        # Outputs and cache of the cropping graph
        crop_op = crop_graph(image_ph, boxes_ph, box_ind_ph, mask_ind_ph,
                             box_size, mask_objects)
        labeled_crops = tf.tuple([crop_op] +
                                 [targets_phs[t] for t in targets_phs.keys()])

        self.input_variables = {
            "image_ph": image_ph,
            "boxes_ph": boxes_ph,
            "box_ind_ph": box_ind_ph,
            "targets_phs": targets_phs,
            "mask_ind_ph": mask_ind_ph,
            "labeled_crops": labeled_crops,
            "shapes": {
                "crops": crop_shape,
                "images": imgs_shape,
                "batch": batch_shape
            },
        }

        # Training variables
        self.train_variables = {
            "image_batch":
            self.input_variables["labeled_crops"][0],
            "target_0":
            tf.one_hot(self.input_variables["labeled_crops"][1],
                       self.dset.targets[0].shape[1])
        }
Example #40
0
def natural_to_standard(eta1, eta2, name='gauss_to_stndrd'):
    with tf.name_scope(name):
        sigma = tf.matrix_inverse(-2 * eta2)
        mu = tf.matmul(sigma, tf.expand_dims(eta1, axis=2))
        mu = tf.reshape(mu, eta1.get_shape())
        return tf.tuple((mu, sigma), name='stndrd_params')
Example #41
0
    def create_model_multigpu(self):
        losses = []
        grads = []
        ops = [tf.constant(0)]
        self.objs = []
        self.global_step = tf.train.get_or_create_global_step()
        optim = self.get_optim()

        fetch_data = None
        if self.model_config.fetch_mode == 'tf_example_dataset':
            fetch_data = self.data.get_data_sample()

        with tf.variable_scope(tf.get_variable_scope()) as scope:
            for gpu_id in range(self.model_config.num_gpus):
                with tf.device('/device:GPU:%d' % gpu_id):
                    with tf.name_scope('%s_%d' % ('gpu_scope', gpu_id)):
                        loss, obj = self.create_model(fetch_data=fetch_data)
                        if self.model_config.npad_mode == 'v1':
                            vars = tf.get_collection(
                                tf.GraphKeys.TRAINABLE_VARIABLES,
                                scope=
                                'model/transformer_decoder/decoder/layer_5/npad/'
                            )
                            grad = optim.compute_gradients(
                                loss,
                                colocate_gradients_with_ops=True,
                                var_list=vars)
                        elif self.model_config.npad_mode == 'static_seq':
                            vars = tf.get_collection(
                                tf.GraphKeys.TRAINABLE_VARIABLES,
                                scope='model/transformer_decoder/npad/')
                            grad = optim.compute_gradients(
                                loss,
                                colocate_gradients_with_ops=True,
                                var_list=vars)
                        else:
                            grad = optim.compute_gradients(
                                loss, colocate_gradients_with_ops=True)
                        tf.get_variable_scope().reuse_variables()
                        losses.append(loss)
                        grads.append(grad)
                        if 'rule' in self.model_config.memory and self.is_train:
                            ops.append(obj['mem_contexts'])
                            ops.append(obj['mem_outputs'])
                            ops.append(obj['mem_counter'])
                        self.objs.append(obj)

        with tf.variable_scope('optimization'):
            self.loss = tf.divide(tf.add_n(losses), self.model_config.num_gpus)
            self.perplexity = tf.exp(tf.reduce_mean(self.loss))

            if self.is_train:
                avg_grad = self.average_gradients(grads)
                grads = [g for (g, v) in avg_grad]
                clipped_grads, _ = tf.clip_by_global_norm(
                    grads, self.model_config.max_grad_norm)
                if self.model_config.npad_mode == 'v1':
                    vars = tf.get_collection(
                        tf.GraphKeys.TRAINABLE_VARIABLES,
                        scope='model/transformer_decoder/decoder/layer_5/npad/'
                    )
                elif self.model_config.npad_mode == 'static_seq':
                    vars = tf.get_collection(
                        tf.GraphKeys.TRAINABLE_VARIABLES,
                        scope='model/transformer_decoder/npad/')
                else:
                    vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES)
                self.train_op = optim.apply_gradients(
                    zip(clipped_grads, vars), global_step=self.global_step)
                self.increment_global_step = tf.assign_add(self.global_step, 1)

            self.saver = tf.train.Saver(write_version=tf.train.SaverDef.V2)
            self.ops = tf.tuple(ops)
Example #42
0
    def __call__(self, dataset, moving_params=None):
        """"""

        vocabs = dataset.vocabs
        inputs = dataset.inputs
        targets = dataset.targets

        reuse = (moving_params is not None)
        self.tokens_to_keep3D = tf.expand_dims(
            tf.to_float(tf.greater(inputs[:, :, 0], vocabs[0].ROOT)), 2)
        self.sequence_lengths = tf.reshape(
            tf.reduce_sum(self.tokens_to_keep3D, [1, 2]), [-1, 1])
        self.n_tokens = tf.reduce_sum(self.sequence_lengths)
        self.moving_params = moving_params

        word_inputs, pret_inputs = vocabs[0].embedding_lookup(
            inputs[:, :, 0], inputs[:, :, 1], moving_params=self.moving_params)
        tag_inputs = vocabs[1].embedding_lookup(
            inputs[:, :, 2], moving_params=self.moving_params)

        top_recur = self.embed_concat(word_inputs + pret_inputs, tag_inputs)
        for i in xrange(self.n_recur):
            with tf.variable_scope('RNN%d' % i, reuse=reuse):
                top_recur, _ = self.RNN(top_recur)

        top_mlp = top_recur
        with tf.variable_scope('MLP0', reuse=reuse):
            parse_mlp, rel_mlp = self.double_MLP(top_mlp, n_splits=2)

        with tf.variable_scope('Parses', reuse=reuse):
            parse_logits = tf.squeeze(self.linear_classifier(parse_mlp, 1))
            parse_output = self.output(parse_logits, targets[:, :, 1])
            if moving_params is None:
                predictions = targets[:, :, 1]
            else:
                predictions = parse_output['predictions']
        with tf.variable_scope('Rels', reuse=reuse):
            rel_logits, rel_logits_cond = self.conditional_linear_classifier(
                rel_mlp, len(vocabs[2]), predictions)
            rel_output = self.output(rel_logits, targets[:, :, 2])
            rel_output['probabilities'] = self.conditional_probabilities(
                rel_logits_cond, transpose=False)

        output = {}
        output['probabilities'] = tf.tuple(
            [parse_output['probabilities'], rel_output['probabilities']])
        output['predictions'] = tf.pack(
            [parse_output['predictions'], rel_output['predictions']])
        output['correct'] = parse_output['correct'] * rel_output['correct']
        output['tokens'] = parse_output['tokens']
        output['n_correct'] = tf.reduce_sum(output['correct'])
        output['n_tokens'] = self.n_tokens
        output['accuracy'] = output['n_correct'] / output['n_tokens']
        output['loss'] = parse_output['loss'] + rel_output['loss']

        output['embed'] = tf.pack([word_inputs, tag_inputs])
        output['recur'] = top_recur
        output['parse_mlp'] = parse_mlp
        output['rel_mlp'] = rel_mlp
        output['parse_logits'] = parse_logits
        output['rel_logits'] = rel_logits
        return output
Example #43
0
 def train_loop_body(step):
   train_op = optimizer.minimize(
       build_loss_fn if tf.executing_eagerly() else build_loss_fn())
   return tf.tuple([tf.add(step, 1)], control_inputs=[train_op])
Example #44
0
 def data_output(self):
     return tf.tuple(tensors=[self.generated_img, self.reproduced_sound])
Example #45
0
  def training_graph(self, input_data, input_labels, random_seed):
    """Constructs a TF graph for training a random tree.

    Args:
      input_data: A tensor or placeholder for input data.
      input_labels: A tensor or placeholder for labels associated with
        input_data.
      random_seed: The random number generator seed to use for this tree.  0
        means use the current time as the seed.

    Returns:
      The last op in the random tree training graph.
    """
    # Count extremely random stats.
    (pcw_node_delta, pcw_splits_indices, pcw_splits_delta, pcw_totals_indices,
     pcw_totals_delta, input_leaves) = (
         self.training_ops.count_extremely_random_stats(
             input_data, input_labels, self.variables.tree,
             self.variables.tree_thresholds,
             self.variables.node_to_accumulator_map,
             self.variables.candidate_split_features,
             self.variables.candidate_split_thresholds,
             num_classes=self.params.num_classes))
    node_update_op = tf.assign_add(self.variables.node_per_class_weights,
                                   pcw_node_delta)
    candidate_update_op = self.training_ops.scatter_add_ndim(
        self.variables.candidate_split_per_class_weights,
        pcw_splits_indices, pcw_splits_delta)

    totals_update_op = self.training_ops.scatter_add_ndim(
        self.variables.total_split_per_class_weights, pcw_totals_indices,
        pcw_totals_delta)

    # Sample inputs.
    update_indices, feature_updates, threshold_updates = (
        self.training_ops.sample_inputs(
            input_data, self.variables.node_to_accumulator_map,
            input_leaves, self.variables.candidate_split_features,
            self.variables.candidate_split_thresholds,
            split_initializations_per_input=(
                self.params.split_initializations_per_input),
            split_sampling_random_seed=random_seed))
    update_features_op = tf.scatter_update(
        self.variables.candidate_split_features, update_indices,
        feature_updates)
    update_thresholds_op = tf.scatter_update(
        self.variables.candidate_split_thresholds, update_indices,
        threshold_updates)

    # Calculate finished nodes.
    with tf.control_dependencies([totals_update_op]):
      children = tf.squeeze(tf.slice(self.variables.tree, [0, 0], [-1, 1]),
                            squeeze_dims=[1])
      is_leaf = tf.equal(LEAF_NODE, children)
      leaves = tf.to_int32(tf.squeeze(tf.where(is_leaf), squeeze_dims=[1]))
      finished = self.training_ops.finished_nodes(
          leaves, self.variables.node_to_accumulator_map,
          self.variables.total_split_per_class_weights,
          num_split_after_samples=self.params.split_after_samples)

    # Update leaf scores.
    # TODO(gilberth): Optimize this. It currently calculates counts for
    # every non-fertile leaf.
    with tf.control_dependencies([node_update_op]):
      def f1():
        return self.variables.non_fertile_leaf_scores
      def f2():
        counts = tf.gather(self.variables.node_per_class_weights,
                           self.variables.non_fertile_leaves)
        new_scores = self._weighted_gini(counts)
        return tf.assign(self.variables.non_fertile_leaf_scores, new_scores)

      # Because we can't have tf.self.variables of size 0, we have to put in a
      # garbage value of -1 in there.  Here we check for that so we don't
      # try to index into node_per_class_weights in a tf.gather with a negative
      # number.
      update_nonfertile_leaves_scores_op = tf.cond(tf.less(
          self.variables.non_fertile_leaves[0], 0), f1, f2)

    # Calculate best splits.
    with tf.control_dependencies([candidate_update_op, totals_update_op]):
      split_indices = self.training_ops.best_splits(
          finished, self.variables.node_to_accumulator_map,
          self.variables.candidate_split_per_class_weights,
          self.variables.total_split_per_class_weights)

    # Grow tree.
    with tf.control_dependencies([update_features_op, update_thresholds_op]):
      (tree_update_indices, tree_children_updates,
       tree_threshold_updates, tree_depth_updates, new_eot) = (
           self.training_ops.grow_tree(
               self.variables.end_of_tree, self.variables.tree_depths,
               self.variables.node_to_accumulator_map, finished, split_indices,
               self.variables.candidate_split_features,
               self.variables.candidate_split_thresholds))
      tree_update_op = tf.scatter_update(
          self.variables.tree, tree_update_indices, tree_children_updates)
      threhsolds_update_op = tf.scatter_update(
          self.variables.tree_thresholds, tree_update_indices,
          tree_threshold_updates)
      depth_update_op = tf.scatter_update(
          self.variables.tree_depths, tree_update_indices, tree_depth_updates)

    # Update fertile slots.
    with tf.control_dependencies([update_nonfertile_leaves_scores_op,
                                  depth_update_op]):
      (node_map_updates, accumulators_cleared, accumulators_allocated,
       new_nonfertile_leaves, new_nonfertile_leaves_scores) = (
           self.training_ops.update_fertile_slots(
               finished, self.variables.non_fertile_leaves,
               self.variables.non_fertile_leaf_scores,
               self.variables.end_of_tree, self.variables.tree_depths,
               self.variables.candidate_split_per_class_weights,
               self.variables.total_split_per_class_weights,
               self.variables.node_to_accumulator_map,
               max_depth=self.params.max_depth))

    # Ensure end_of_tree doesn't get updated until UpdateFertileSlots has
    # used it to calculate new leaves.
    gated_new_eot, = tf.tuple([new_eot], control_inputs=[new_nonfertile_leaves])
    eot_update_op = tf.assign(self.variables.end_of_tree, gated_new_eot)

    updates = []
    updates.append(eot_update_op)
    updates.append(tree_update_op)
    updates.append(threhsolds_update_op)
    updates.append(tf.assign(
        self.variables.non_fertile_leaves, new_nonfertile_leaves,
        validate_shape=False))
    updates.append(tf.assign(
        self.variables.non_fertile_leaf_scores,
        new_nonfertile_leaves_scores, validate_shape=False))

    updates.append(tf.scatter_update(
        self.variables.node_to_accumulator_map,
        tf.squeeze(tf.slice(node_map_updates, [0, 0], [1, -1]),
                   squeeze_dims=[0]),
        tf.squeeze(tf.slice(node_map_updates, [1, 0], [1, -1]),
                   squeeze_dims=[0])))

    cleared_and_allocated_accumulators = tf.concat(
        0, [accumulators_cleared, accumulators_allocated])
    # Calculate values to put into scatter update for candidate counts.
    # Candidate split counts are always reset back to 0 for both cleared
    # and allocated accumulators. This means some accumulators might be doubly
    # reset to 0 if the were released and not allocated, then later allocated.
    candidate_pcw_values = tf.tile(
        tf.expand_dims(tf.expand_dims(
            tf.zeros_like(cleared_and_allocated_accumulators, dtype=tf.float32),
            1), 2),
        [1, self.params.num_splits_to_consider, self.params.num_classes])
    updates.append(tf.scatter_update(
        self.variables.candidate_split_per_class_weights,
        cleared_and_allocated_accumulators, candidate_pcw_values))

    # Calculate values to put into scatter update for total counts.
    total_cleared = tf.tile(
        tf.expand_dims(
            tf.neg(tf.ones_like(accumulators_cleared, dtype=tf.float32)), 1),
        [1, self.params.num_classes])
    total_reset = tf.tile(
        tf.expand_dims(
            tf.zeros_like(accumulators_allocated, dtype=tf.float32), 1),
        [1, self.params.num_classes])
    total_pcw_updates = tf.concat(0, [total_cleared, total_reset])
    updates.append(tf.scatter_update(
        self.variables.total_split_per_class_weights,
        cleared_and_allocated_accumulators, total_pcw_updates))

    # Calculate values to put into scatter update for candidate splits.
    split_features_updates = tf.tile(
        tf.expand_dims(
            tf.neg(tf.ones_like(cleared_and_allocated_accumulators)), 1),
        [1, self.params.num_splits_to_consider])
    updates.append(tf.scatter_update(
        self.variables.candidate_split_features,
        cleared_and_allocated_accumulators, split_features_updates))

    return tf.group(*updates)
Example #46
0
    def _build_base_rnn(self, inputs, input_seq_lengths, forward_only=True):
        """
        Build the Acoustic RNN

        Parameters
        ----------
        :param inputs: inputs to the RNN
        :param input_seq_lengths: vector containing the length of each input from 'inputs'
        :param forward_only: whether the RNN will be used for training or not (if true then add a dropout layer)
        
        Returns
        ----------
        :returns logits: each char probability for each timestep of the input, for each item of the batch
        :returns prediction: the best prediction for the input
        :returns rnn_keep_state_op: a tensorflow op to save the RNN internal state for the next batch
        :returns rnn_state_zero_op: a tensorflow op to reset the RNN internal state to zeros
        :returns input_keep_prob_ph: a placeholder for input_keep_prob of the dropout layer
                                     (None if forward_only is True)
        :returns output_keep_prob_ph: a placeholder for output_keep_prob of the dropout layer
                                      (None if forward_only is True)
        :returns rnn_tuple_state: the RNN internal state
        """
        # Define a variable to keep track of the learning process step
        global_step = tf.Variable(0, trainable=False, name='global_step')

        # If building the RNN for training then create dropout rate placeholders
        input_keep_prob_ph = output_keep_prob_ph = None
        if not forward_only:
            with tf.name_scope('dropout'):
                # Create placeholders, used to override values when running on the test set
                input_keep_prob_ph = tf.placeholder(tf.float32)
                output_keep_prob_ph = tf.placeholder(tf.float32)

        # Define cells of acoustic model
        with tf.variable_scope('LSTM'):
            # Create each layer
            layers_list = []
            for _ in range(self.num_layers):
                cell = tf.contrib.rnn.BasicLSTMCell(self.hidden_size, state_is_tuple=True)

                # If building the RNN for training then add a dropoutWrapper to the cells
                if not forward_only:
                    with tf.name_scope('dropout'):
                        cell = tf.contrib.rnn.DropoutWrapper(cell, input_keep_prob=input_keep_prob_ph,
                                                             output_keep_prob=output_keep_prob_ph)
                layers_list.append(cell)

            # Store the layers in a multi-layer RNN
            cell = tf.contrib.rnn.MultiRNNCell(layers_list, state_is_tuple=True)

        # Build the input layer between input and the RNN
        with tf.variable_scope('Input_Layer'):
            w_i = tf.get_variable("input_w", [self.input_dim, self.hidden_size], tf.float32,
                                  initializer=tf.contrib.layers.xavier_initializer())
            b_i = tf.get_variable("input_b", [self.hidden_size], tf.float32,
                                  initializer=tf.constant_initializer(0.0))

        # Apply the input layer to the network input to produce the input for the rnn part of the network
        rnn_inputs = [tf.matmul(tf.squeeze(i, axis=[0]), w_i) + b_i
                      for i in tf.split(axis=0, num_or_size_splits=self.max_input_seq_length, value=inputs)]
        # Switch from a list to a tensor
        rnn_inputs = tf.stack(rnn_inputs)

        # Add a batch normalization layer to the model if needed
        if self.normalization:
            with tf.name_scope('Normalization'):
                epsilon = 1e-3
                # Note : the tensor is [time, batch_size, input vector] so we go against dim 1
                batch_mean, batch_var = tf.nn.moments(rnn_inputs, [1], shift=None, name="moments", keep_dims=True)
                rnn_inputs = tf.nn.batch_normalization(rnn_inputs, batch_mean, batch_var, None, None,
                                                       epsilon, name="batch_norm")

        # Define some variables to store the RNN state
        # Note : tensorflow keep the state inside a batch but it's necessary to do this in order to keep the state
        #        between batches, especially when doing live transcript
        #        Another way would have been to get the state as an output of the session and feed it every time but
        #        this way is much more efficient
        with tf.variable_scope('Hidden_state'):
            state_variables = []
            for state_c, state_h in cell.zero_state(self.batch_size, tf.float32):
                state_variables.append(tf.nn.rnn_cell.LSTMStateTuple(
                    tf.Variable(state_c, trainable=False),
                    tf.Variable(state_h, trainable=False)))
            # Return as a tuple, so that it can be fed to dynamic_rnn as an initial state
            rnn_tuple_state = tuple(state_variables)

        # Build the RNN
        with tf.name_scope('LSTM'):
            rnn_output, new_states = tf.nn.dynamic_rnn(cell, rnn_inputs, sequence_length=input_seq_lengths,
                                                       initial_state=rnn_tuple_state, time_major=True)

        # Define an op to keep the hidden state between batches
        update_ops = []
        for state_variable, new_state in zip(rnn_tuple_state, new_states):
            # Assign the new state to the state variables on this layer
            update_ops.extend([state_variable[0].assign(new_state[0]),
                               state_variable[1].assign(new_state[1])])
        # Return a tuple in order to combine all update_ops into a single operation.
        # The tuple's actual value should not be used.
        rnn_keep_state_op = tf.tuple(update_ops)

        # Define an op to reset the hidden state to zeros
        update_ops = []
        for state_variable in rnn_tuple_state:
            # Assign the new state to the state variables on this layer
            update_ops.extend([state_variable[0].assign(tf.zeros_like(state_variable[0])),
                               state_variable[1].assign(tf.zeros_like(state_variable[1]))])
        # Return a tuple in order to combine all update_ops into a single operation.
        # The tuple's actual value should not be used.
        rnn_state_zero_op = tf.tuple(update_ops)

        # Build the output layer between the RNN and the char_map
        with tf.variable_scope('Output_layer'):
            w_o = tf.get_variable("output_w", [self.hidden_size, self.num_labels], tf.float32,
                                  initializer=tf.contrib.layers.xavier_initializer())
            b_o = tf.get_variable("output_b", [self.num_labels], tf.float32,
                                  initializer=tf.constant_initializer(0.0))

        # Compute the logits (each char probability for each timestep of the input, for each item of the batch)
        logits = tf.stack([tf.matmul(tf.squeeze(i, axis=[0]), w_o) + b_o
                          for i in tf.split(axis=0, num_or_size_splits=self.max_input_seq_length, value=rnn_output)])

        # Compute the prediction which is the best "path" of probabilities for each item of the batch
        decoded, _log_prob = tf.nn.ctc_beam_search_decoder(logits, input_seq_lengths)
        # Set the RNN result to the best path found
        prediction = tf.to_int32(decoded[0])

        return global_step, logits, prediction, rnn_keep_state_op, rnn_state_zero_op,\
            input_keep_prob_ph, output_keep_prob_ph, rnn_tuple_state