def regression_approach():
    # None for batch size and 784 is the image size flattened, 10 digits for classification
    x = tf.placeholder("float", shape=[None, 784], name="x_input")
    y_ = tf.placeholder("float", shape=[None, 10], name="y_input")

    # init of weights and bias
    W = tf.Variable(tf.zeros([784,10]), name="weights")
    b = tf.Variable(tf.zeros([10]), name="bias")

    # init vairables in a session
    # sess.run(tf.initialize_all_variables())

    # predictions
    with tf.name_scope("prediction") as scope:
        y = tf.nn.softmax(tf.matmul(x, W) + b)

    w_hist = tf.histogram_summary("weights", W)
    b_hist = tf.histogram_summary("biases", b)
    y_hist = tf.histogram_summary("y", y)

    # cost func set to be cross entropy
    with tf.name_scope("cost_func") as scope:
        cross_entropy = -tf.reduce_sum(y_ * tf.log(y))
        ce_sum = tf.scalar_summary("cross_entropy", cross_entropy)

    # training happens after all is defined and steepest gradient descent is used
    with tf.name_scope("train") as scope:
        train_step = tf.train.GradientDescentOptimizer(0.01).minimize(cross_entropy)

	# evaluate model
	with tf.name_scope("test") as scope:
            correct_prediction = tf.equal(tf.argmax(y,1), tf.argmax(y_,1))
            accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))
            accuracy_summary = tf.scalar_summary("accuracy", accuracy)

	# merge all the summaries
	merged = tf.merge_all_summaries()

	# Launch the graph
	with tf.Session() as sess:
            sess.run(tf.initialize_all_variables())

            writer = tf.train.SummaryWriter('/home/vionlabs/Documents/vionlabs_weilun/machine_learning/tensorflow_testing/graph', graph_def=sess.graph_def)
            # tf.initialize_all_variables().run()

            for i in range(1000):
                if i % 10 == 0:  # Record summary data, and the accuracy
                    feed = {x: mnist.test.images, y_: mnist.test.labels}
                    result = sess.run([merged, accuracy], feed_dict=feed)
                    summary_str = result[0]
                    acc = result[1]
                    writer.add_summary(summary_str, i)
                    print("Accuracy at step %s: %s" % (i, acc))
                else:
                    batch_xs, batch_ys = mnist.train.next_batch(100)
                    feed = {x: batch_xs, y_: batch_ys}
                    sess.run(train_step, feed_dict=feed)


            print accuracy.eval(feed_dict={x: mnist.test.images, y_: mnist.test.labels})
def _activation_summary(x):
    '''
    可視化用のサマリを作成
    '''
    tensor_name = re.sub('%s_[0-9]*/' % TOWER_NAME, '', x.op.name)
    tf.histogram_summary(tensor_name + '/activations', x)
    tf.scalar_summary(tensor_name + '/sparsity', tf.nn.zero_fraction(x))
def training(loss, learning_rate, loss_key=None):
  """Sets up the training Ops.

  Creates a summarizer to track the loss over time in TensorBoard.

  Creates an optimizer and applies the gradients to all trainable variables.

  The Op returned by this function is what must be passed to the
  `sess.run()` call to cause the model to train.

  Args:
    loss: Loss tensor, from loss().
    learning_rate: The learning rate to use for gradient descent.
    loss_key: int giving stage of pretraining so we can store
                loss summaries for each pretraining stage

  Returns:
    train_op: The Op for training.
  """
  if loss_key is not None:
    # Add a scalar summary for the snapshot loss.
    loss_summaries[loss_key] = tf.scalar_summary(loss.op.name, loss)
  else:
    tf.scalar_summary(loss.op.name, loss)
    for var in tf.trainable_variables():
      tf.histogram_summary(var.op.name, var)
  # Create the gradient descent optimizer with the given learning rate.
  optimizer = tf.train.GradientDescentOptimizer(learning_rate)
  # Create a variable to track the global step.
  global_step = tf.Variable(0, name='global_step', trainable=False)
  # Use the optimizer to apply the gradients that minimize the loss
  # (and also increment the global step counter) as a single training step.
  train_op = optimizer.minimize(loss, global_step=global_step)
  return train_op, global_step
Example #4
0
def produce_embeddings(source):
    """ Produce the embbedings from the one-hot vectors 
    
    Args:
        source: 4D tensor, shape=(BATCH_SIZE, 1, S_ENGLISH, T_ENGLISH)
    
    Returns:
        4D tensor, shape=(BATCH_SIZE, 1, S_ENGLISH, EMBEDDINGS_DIMENSION)
    """
    
    with tf.variable_scope('Embeddings'):
        weights = tf.get_variable(name='weights', 
                                  shape=[1,1,T_ENGLISH,EMBEDDINGS_DIMENSION], 
                                  initializer=tf.random_normal_initializer(stddev=1.0/math.sqrt(float(T_ENGLISH)))
                                  )
        
        weights_hist = tf.histogram_summary("weights-encode", weights)
        
        biases = tf.get_variable(name='biases',
                                 shape=[EMBEDDINGS_DIMENSION],          
                                 initializer=tf.constant_initializer(0.0))
                                 
        biases_hist = tf.histogram_summary("biases-encode", biases)
        
        embeddings = tf.nn.tanh(biases + tf.nn.conv2d(source, filter=weights, strides=[1,1,1,1], padding='VALID'))
        
        return embeddings                
Example #5
0
    def nn_conv_layer(input_tensor, patch_size, num_channels,output_depth, layer_name, biases=False,act=None, pool=None):
        """Reusable code for making a simple neural net layer.

    """
        # Adding a name scope ensures logical grouping of the layers in the graph.
        with tf.name_scope(layer_name):
            # This Variable will hold the state of the weights for the layer
            with tf.name_scope('weights'):
                weights = weight_variable([patch_size,patch_size,num_channels,output_depth])
                # print ("weights:%s"%(weights.get_shape()))
                variable_summaries(weights, layer_name + '/weights')
            if (biases==True):
                with tf.name_scope('biases'):
                    biases = bias_variable([output_depth])
                    # print("biases:%s" % (biases.get_shape()))
                    variable_summaries(biases, layer_name + '/biases')
            with tf.name_scope('conv2d'):
                # print("input:%s" % (input_tensor.get_shape()))
                preactivate = tf.nn.conv2d(input_tensor, weights, [1, 1, 1, 1], padding='SAME')
                tf.histogram_summary(layer_name + '/pre_activations', preactivate)
                print("preactivate:%s" % (preactivate.get_shape()))
            if (pool!=None):
                max_pool=pool(preactivate,ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1],
                                         padding='SAME',name='max_pool')
            if (act!=None):
                activations = act(max_pool+biases, 'activation')
                # tf.histogram_summary(layer_name + '/activations', activations)

            return preactivate
    def train(self, total_loss):
        loss_averages = tf.train.ExponentialMovingAverage(0.9, name='avg')
        losses = tf.get_collection('losses')
        loss_averages_op = loss_averages.apply(losses + [total_loss])

        for l in losses + [total_loss]:
            tf.scalar_summary(l.op.name + ' (raw)', l)

        # Apply gradients, and add histograms
        with tf.control_dependencies([loss_averages_op]):
            opt = tf.train.AdamOptimizer()
            grads = opt.compute_gradients(total_loss)
        apply_gradient_op = opt.apply_gradients(grads)
        for var in tf.trainable_variables():
            tf.histogram_summary(var.op.name, var)
        for grad, var in grads:
            if grad is not None:
                tf.histogram_summary(var.op.name + '/gradients', grad)

        # Track the moving averages of all trainable variables
        variable_averages = tf.train.ExponentialMovingAverage(Recognizer.MOVING_AVERAGE_DECAY)
        variables_averages_op = variable_averages.apply(tf.trainable_variables())

        with tf.control_dependencies([apply_gradient_op, variables_averages_op]):
            train_op = tf.no_op(name='train')
        return train_op
def train(total_loss, global_step):
    num_batches_per_epoch = NUM_EXAMPLES_PER_EPOCH_FOR_TRAIN / FLAGS.batch_size
    decay_steps = int(num_batches_per_epoch * NUM_EPOCHS_PER_DECAY)

    lr = tf.train.exponential_decay(INITIAL_LEARNING_RATE, global_step, decay_steps, LEARNING_RATE_DECAY_FACTOR, staircase=True)
    tf.scalar_summary("learning_rate", lr)

    loss_averages_op = _add_loss_summaries(total_loss)

    with tf.control_dependencies([loss_averages_op]):
        opt = tf.train.GradientDescentOptimizer(lr)
        grads = opt.compute_gradients(total_loss)

    apply_gradient_op = opt.apply_gradients(grads, global_step=global_step)

    for var in tf.trainable_variables():
        tf.histogram_summary(var.op.name, var)

    for grad, var in grads:
        if grad:
            tf.histogram_summary(var.op.name + "/gradients", grad)

    #variable_averages = tf.train.ExponentialMovingAverage(MOVING_AVERAGE_DECAY, global_step)
    #variables_averages_op = variable_averages.apply(tf.trainable_variables())

    with tf.control_dependencies([apply_gradient_op]):
        train_op = tf.no_op(name="train")

    return train_op
Example #8
0
def _add_gradients_summaries(grads_and_vars):
  """Add histogram summaries to gradients.

  Note: The summaries are also added to the SUMMARIES collection.

  Args:
    grads_and_vars: A list of gradient to variable pairs (tuples).

  Returns:
    The _list_ of the added summaries for grads_and_vars.
  """
  summaries = []
  for grad, var in grads_and_vars:
    if grad is not None:
      if isinstance(grad, tf.IndexedSlices):
        grad_values = grad.values
      else:
        grad_values = grad
      summaries.append(tf.histogram_summary(var.op.name + ':gradient',
                                            grad_values))
      summaries.append(tf.histogram_summary(var.op.name + ':gradient_norm',
                                            tf.global_norm([grad_values])))
    else:
      tf.logging.info('Var %s has no gradient', var.op.name)
  return summaries
Example #9
0
File: Model.py Project: amharc/jnp3
    def pool_layer(self, input_, ksize, stride, name):
        with tf.variable_scope(name):
            pooled = self.max_pool(input_, ksize, stride, name="name")

            tf.histogram_summary(name + "/pooled", pooled)

            return pooled
Example #10
0
    def __init__(self, config):
        self.config = config

        self.input = tf.placeholder('int32', [self.config.batch_size, config.max_seq_len], name='input')
        self.labels = tf.placeholder('int64', [self.config.batch_size], name='labels')
        self.labels_one_hot = tf.one_hot(indices=self.labels,
                                         depth=config.output_dim,
                                         on_value=1.0,
                                         off_value=0.0,
                                         axis=-1)

        self.gru = GRUCell(config.hidden_state_dim)

        embeddings_we = tf.get_variable('word_embeddings', initializer=tf.random_uniform([config.vocab_size, config.embedding_dim], -1.0, 1.0))
        self.emb = embed_input = tf.nn.embedding_lookup(embeddings_we, self.input)
        inputs = [tf.squeeze(i, squeeze_dims=[1]) for i in tf.split(1, config.max_seq_len, embed_input)]

        outputs, last_slu_state = tf.nn.rnn(
            cell=self.gru,
            inputs=inputs,
            dtype=tf.float32,)

        w_project = tf.get_variable('project2labels', initializer=tf.random_uniform([config.hidden_state_dim, config.output_dim], -1.0, 1.0))
        self.logits = logits_bo = tf.matmul(last_slu_state, w_project)
        tf.histogram_summary('logits', logits_bo)
        self.probabilities = tf.nn.softmax(logits_bo)
        self.loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits_bo, self.labels_one_hot))
        self.predict = tf.nn.softmax(logits_bo)

        # TensorBoard
        self.accuracy = tf.reduce_mean(tf.cast(tf.equal(tf.argmax(self.predict, 1), self.labels), 'float32'), name='accuracy')
        tf.scalar_summary('CCE loss', self.loss)
        tf.scalar_summary('Accuracy', self.accuracy)
        self.tb_info = tf.merge_all_summaries()
Example #11
0
def inference(images):
    """
    Build the MNIST model
    """

    # Hidden 1
    with tf.name_scope('hidden1'):
        weights = tf.Variable(
            tf.truncated_normal([IMAGE_PIXELS, LAYER_SIZE],
                                stddev= 1.0 / math.sqrt(float(IMAGE_PIXELS))),
        name='weights')
        biases = tf.Variable(tf.zeros([LAYER_SIZE]),
                             name='biases')
        hidden1 = tf.nn.relu(tf.matmul(images, weights) + biases)
        # Add summary ops to collect data
        tf.histogram_summary('weights', weights)
        tf.histogram_summary('biases', biases)

    # Output Layer - is this correct? does this layer have any weights?
    with tf.name_scope('softmax_linear'):
        weights = tf.Variable(
            tf.truncated_normal([LAYER_SIZE, NUM_CLASSES],
                                stddev=1.0 / math.sqrt(float(LAYER_SIZE))),
            name='weights')
        biases = tf.Variable(tf.zeros([NUM_CLASSES]),
                             name='biases')
        logits = logSoftMax(tf.matmul(hidden1, weights) + biases)
        return logits
Example #12
0
def train(total_loss, global_step, learning_rate=INITIAL_LEARNING_RATE):
  lr = tf.train.exponential_decay(learning_rate,
                                  global_step,
                                  DECAY_STEPS,#number of steps required for it to decay
                                  LEARNING_RATE_DECAY_FACTOR,
                                  staircase=True)

  tf.scalar_summary('learning_rate', lr)

  #compute gradient step
  with tf.control_dependencies([total_loss]):
    opt = tf.train.MomentumOptimizer(lr, momentum=0.95)
    grads = opt.compute_gradients(total_loss)

  #if we wanted to clip the gradients
  #would apply the operation here

  #apply the gradients
  apply_gradient_op = opt.apply_gradients(grads, global_step=global_step)

  for grad, var in grads:
    if grad is not None:
      print("Found gradients for: ", var.op.name)
      tf.histogram_summary(var.op.name + "/gradients", grad)

  with tf.control_dependencies([apply_gradient_op]):
    train_op = tf.no_op(name="train")

  #opt = tf.train.GradientDescentOptimizer(lr).minimize(total_loss, global_step=global_step)
  # grads = opt.compute_gradients(total_loss)

  return train_op
Example #13
0
    def inference(self, images, z):
        print "="*100
        print "images DCGAN inference:"
        print images.get_shape()
        print "="*100

        self.z_sum = tf.histogram_summary("z", z)

        # Generative
        print "generative"
        self.generator = Generative()
        self.G = self.generator.inference(z)

        # Discriminative
        print "discriminative from images"
        self.discriminator = Discriminative()
        self.D, self.D_logits = self.discriminator.inference(images)

        print "discriminative for sample from noize"
        self.sampler = self.generator.sampler(z)
        self.D_, self.D_logits_ = self.discriminator.inference(self.G, reuse=True)

        self.d_sum = tf.histogram_summary("d", self.D)
        self.d__sum = tf.histogram_summary("d_", self.D_)
        self.G_sum = tf.image_summary("G", self.G)

        return images, self.D_logits, self.D_logits_, self.G_sum, self.z_sum, self.d_sum, self.d__sum
Example #14
0
	def conv_layer(input, filter_shape, strides=[1, 1, 1, 1], keep_prob=1):
		""" Adds a convolutional layer to the graph. 
	
		Creates filters and biases, computes the convolutions, passes the output
		through a leaky ReLU activation function and applies dropout. Equivalent
		to calling conv_op()->leaky_relu()->dropout().

		Args:
			input: A tensor of floats with shape [batch_size, input_height,
				input_width, input_depth]. The input volume.
			filter_shape: A list of 4 integers with shape [filter_height, 
			filter_width, input_depth, output_depth]. This determines the size
			and number of filters of the convolution.
			strides: A list of 4 integers. The amount of stride in the four
				dimensions of the input.
			keep_prob: A float. Probability of dropout in the layer.
			
		Returns:
			A tensor of floats with shape [batch_size, output_height,
			output_width, output_depth]. The product of the convolutional layer.
		"""
		# conv -> relu -> dropout
		conv = conv_op(input, filter_shape, strides) 
		relu = leaky_relu(conv)
		output = dropout(relu, keep_prob)
		
		# Summarize activations
		scope = tf.get_default_graph()._name_stack # No easier way
		tf.histogram_summary(scope + '/activations', output)
		
		return output
Example #15
0
def train(loss, learning_rate):
	""" Sets up an ADAM optimizer, computes gradients and updates variables.
	
	Args:
		loss: A float. The loss function to minimize.
		learning_rate: A float. The learning rate for ADAM.
	
	Returns:
		train_op: The operation to run for training.
		global_step: The current number of training steps made by the optimizer.
	"""
	# Set optimization parameters
	global_step = tf.Variable(0, name='global_step', trainable=False)
	optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate, beta1=0.9, 
									   beta2=0.995, epsilon=1e-06)
	
	# Compute and apply gradients		   
	gradients = optimizer.compute_gradients(loss)
	train_op = optimizer.apply_gradients(gradients, global_step=global_step)
	
	# Summarize gradients
	for gradient, variable in gradients:
		if gradient is not None:
			tf.histogram_summary(variable.op.name + '/gradients', gradient)

	return train_op, global_step
Example #16
0
def nn_layer(input_tensor, input_dim, output_dim, layer_name, act = tf.tanh):
    """
    Creates and returns NN layer
    input_tensor -- TF tensor at layer input
    input_dim -- size of layer input
    output_dim -- size of layer output
    layer_name -- name of the layer for summaries (statistics)
    act -- nonlinear activation function
    """
    with tf.name_scope(layer_name):
        with tf.name_scope('weights'):
            weights = weight_variable(layer_name+'/weights',[input_dim, output_dim])
            variable_summaries(weights, layer_name+'/weights')
        with tf.name_scope('biases'):
            biases = bias_variable(layer_name+'/biases',[output_dim])
            variable_summaries(biases, layer_name+'/biases')
        with tf.name_scope('WX_plus_b'):
            preactivate = tf.matmul(input_tensor, weights)+biases
            tf.histogram_summary(layer_name+'/pre_activations', preactivate)
        if act is not None:
            activations = act(preactivate, 'activation')
        else:
            activations = preactivate
        tf.histogram_summary(layer_name+'/activations', activations)
    return activations
    def expectation_maximization_step(self, x):
        
        # probability of emission sequence
        obs_prob_seq = tf.gather(self.E, x)

        with tf.name_scope('Forward_Backward'):
            self.forward_backward(obs_prob_seq)

        with tf.name_scope('Re_estimate_transition'):
            new_T0, new_transition = self.re_estimate_transition(x)
        
        with tf.name_scope('Re_estimate_emission'):
            new_emission = self.re_estimate_emission(x)

        with tf.name_scope('Check_Convergence'):
            converged = self.check_convergence(new_T0, new_transition, new_emission)

        with tf.name_scope('Update_parameters'):
            self.T0 = tf.assign(self.T0, new_T0)
            self.E = tf.assign(self.E, new_emission)
            self.T = tf.assign(self.T, new_transition)
            #self.count = tf.assign_add(self.count, 1)
             
            with tf.name_scope('histogram_summary'):
                _ = tf.histogram_summary(self.T0.name, self.T0)
                _ = tf.histogram_summary(self.T.name, self.T)
                _ = tf.histogram_summary(self.E.name, self.E)
        return converged
Example #18
0
  def __init__(self):
    self.state_batch_placeholder = tf.placeholder(
        tf.float32, shape=(None, NUM_TILES))
    self.targets_placeholder = tf.placeholder(tf.float32, shape=(None,))
    self.actions_placeholder = tf.placeholder(tf.int32, shape=(None,))
    self.placeholders = (self.state_batch_placeholder,
                         self.targets_placeholder,
                         self.actions_placeholder)

    self.weights, self.biases, self.activations = build_inference_graph(
        self.state_batch_placeholder, HIDDEN_SIZES)
    self.q_values = self.activations[-1]
    self.loss = build_loss(self.q_values, self.targets_placeholder,
                     self.actions_placeholder)
    self.train_op, self.global_step, self.learning_rate = (
        build_train_op(self.loss))

    tf.scalar_summary("Average Target",
                      tf.reduce_mean(self.targets_placeholder))
    tf.scalar_summary("Learning Rate", self.learning_rate)
    tf.scalar_summary("Loss", self.loss)
    tf.histogram_summary("States", self.state_batch_placeholder)
    tf.histogram_summary("Targets", self.targets_placeholder)

    self.init = tf.initialize_all_variables()
    self.summary_op = tf.merge_all_summaries()
Example #19
0
    def add_latent(self, name,
                   init_mean=None,
                   init_stddev=1e-6,
                   transform=None,
                   shape=None,
                   point_estimate=False):
        if init_mean is None:
            init_mean = np.random.randn()
            
        with tf.name_scope("latent_" + name) as scope:
            latent = {}
            latent["q_mean"] = tf.Variable(init_mean, name="q_mean")
            if point_estimate:
                latent["q_stddev"] = None
                latent["q_entropy"] = 0.0
            else:
                latent["q_stddev"] = tf.Variable(init_stddev, name="q_stddev")
                latent["q_entropy"] = dists.gaussian_entropy(stddev=latent["q_stddev"])
                
            latent["transform"] = transform

            # TODO: infer shape, and make sure that
            #       shapes of q_mean and q_stddev match
            #if shape is None:
            #    shape = _infer_shape(init_mean, init_stddev)
            latent["shape"] = shape

            tf.histogram_summary("latent_%s/q_mean" % name, latent["q_mean"])

            if not point_estimate:
                tf.histogram_summary("latent_%s/q_stddev" % name, latent["q_stddev"])

        self.latents[name] = latent
Example #20
0
File: model.py Project: kkihara/GAN
def deconv_layer(input_tensor, mode_tensor, weight_init, filter_size,
                 filter_stride, num_filters, in_channels, output_size,
                 nonlinear_func, use_batchnorm, name):
    # Initialize variables
    weight_shape = [filter_size, filter_size, num_filters, in_channels]
    initializer = tf.random_normal_initializer(stddev=weight_init)
    deconv_weights = tf.get_variable(name + '/weights',
                                     shape=weight_shape,
                                     initializer=initializer)
    bias = tf.get_variable(name + '/bias',
                           shape=[num_filters],
                           initializer=tf.constant_initializer())

    # Apply deconvolution
    output_shape = [FLAGS.batch_size, output_size, output_size, num_filters]
    stride = [1, filter_stride, filter_stride, 1]
    deconv = tf.nn.conv2d_transpose(input_tensor, deconv_weights, output_shape,
                                    stride, padding='SAME',
                                    name=name + '/deconv')
    deconv = tf.nn.bias_add(deconv, bias, name=name + '/deconv_bias')
    # Apply batchnorm
    if use_batchnorm:
        deconv = batch_norm(deconv, num_filters,
                            tf.equal(mode_tensor, 'train'),
                            name + '/bn')

    activation = nonlinear_func(tf.nn.bias_add(deconv, bias),
                                name=name + '/activation')

    if not tf.get_variable_scope().reuse:
        tf.histogram_summary('summary/weights/' + name, deconv_weights)
        tf.histogram_summary('summary/activations/' + name, activation)
    return activation
Example #21
0
def dcnn_layer(input_tensor, convolution_shape,output_shape, padding, layer_name, config, act=tf.nn.relu,strides=[1,1,1,1]):
    # Adding a name scope ensures logical grouping of the layers in the graph.
    with tf.name_scope(layer_name):
        # This Variable will hold the state of the weights for the layer
        with tf.name_scope('weights'):
            name = layer_name + '/weights'
            weights = weight_variable(convolution_shape, name, config)
            variable_summaries(weights, name)
        with tf.name_scope('biases'):
            biases = bias_variable([convolution_shape[-1]], config)
            variable_summaries(biases, layer_name + '/biases')
        with tf.name_scope('convolution'):
            preactivate = conv2d_transpose(input_tensor, weights,output_shape, padding=padding,strides=strides) + biases
            tf.histogram_summary(layer_name + '/pre_activations', preactivate)

        # Combine the feature maps if this is the last deconvolution
        if output_shape[-1] == 1:
            activations = act(tf.reduce_mean(preactivate,3,keep_dims=True), 'activation')
        else:
            activations = act(preactivate,'activation')

        tf.histogram_summary(layer_name + '/activations', activations)

        print layer_name + ' Shape: ', weights.get_shape(), ' with bias ', biases.get_shape(), ' padding', padding
        shape = activations.get_shape()
        print ' output : ', shape

        return activations, weights
Example #22
0
def conv_layer(input_tensor, input_dim, output_dim, layer_name, act=tf.nn.relu, fully_connected=False):
    """
    Makes a simple convolutional layer based on input and output dimensions.

    input_tensor: A tensor of the input data from the previous layer (of shape [a, b, c, d])

    Returns the pooled tensor after CONV -> ACT -> POOL
    """
    with tf.name_scope(layer_name):
        with tf.name_scope("weights"):
            weights = weight_variable([input_dim, output_dim]) if fully_connected else weight_variable([5, 5, input_dim, output_dim])
            variable_summaries(weights, layer_name + '/weights')
        with tf.name_scope("biases"):
            bias = bias_variable([output_dim])
            variable_summaries(bias, layer_name + '/bias')
        if fully_connected:
            with tf.name_scope("fully_connected"):
                final = act(tf.matmul(input_tensor, weights) + bias)
                tf.histogram_summary(layer_name + '/fully_connected', final)
                return final
        else:
            with tf.name_scope("convolution"):
                convolution = act(conv2d(input_tensor, weights) + bias)
                tf.histogram_summary(layer_name + '/convolution', convolution)
                pooled = max_pool_2x2(convolution)
                return pooled
Example #23
0
def _deconv(inpOp, kH, kW, nOut, dH=1, dW=1, relu=True, name=None):
    global deconv_counter
    global parameters
    if not name:
      name = 'deconv' + str(deconv_counter)
    deconv_counter += 1
    with tf.variable_scope(name) as scope:
        nIn = int(inpOp.get_shape()[-1])
        in_shape = inpOp.get_shape()
        stddev = 1e-3
        kernel = tf.get_variable('weights',[kH, kW, nOut, nIn], initializer=tf.random_normal_initializer(stddev=(kH*kW*nIn)**0.5*stddev))
        
        conv = tf.nn.deconv2d(inpOp, kernel, [int(in_shape[0]),int(in_shape[1]),int(in_shape[2]),nOut], [1, 1, 1, 1],
                         padding="SAME")
                         
        biases = tf.get_variable('biases', [nOut], initializer=tf.constant_initializer(value=0.0))
        bias = tf.reshape(tf.nn.bias_add(conv, biases), conv.get_shape())
        if relu:
          bias = tf.nn.relu(bias, name='relu')
        #parameters += [kernel, biases]
        #bias = tf.Print(bias, [tf.sqrt(tf.reduce_mean(tf.square(inpOp - tf.reduce_mean(inpOp))))], message=kernel.name)
        tf.histogram_summary(bias.name+"/output", bias)
        tf.image_summary(bias.name+"/output", bias[:,:,:,0:3])
        #tf.image_summary(scope+"/depth_weight", depthwise_filter)
        # tf.image_summary(scope+"/point_weight", pointwise_filter)
        
        return bias
Example #24
0
 def dense(self, width=100, act=tf.nn.relu):
     """
     Fully connected layer.
     It does a matrix multiply, bias add, and then uses relu to nonlinearize.
     """
     input_tensor = self.layers[-1]["activations"]
     layer_name = "dense" + str(len([l for l in self.layers
         if l["type"]=="dense"]))
     input_dim = functools.reduce(operator.mul, input_tensor.get_shape()[1:].as_list(), 1)
     input_tensor = tf.reshape(input_tensor, (-1, input_dim))
     # Adding a name scope ensures logical grouping of the layers in the graph.
     with tf.name_scope(layer_name):
         # This Variable will hold the state of the weights for the layer
         with tf.name_scope('weights'):
             weights = weight_variable([input_dim, width])
             variable_summaries(weights, layer_name + '/weights')
         with tf.name_scope('biases'):
             biases = bias_variable([width])
             variable_summaries(biases, layer_name + '/biases')
         with tf.name_scope('Wx_plus_b'):
             preactivate = tf.matmul(input_tensor, weights) + biases
             activations = act(preactivate, 'activation')
             tf.histogram_summary(layer_name + '/activations', activations)
     self.layers.append( {
         "activations": activations,
         "weights": weights,
         "biases": biases,
         "type": "dense"
         } )
     return self
Example #25
0
def _conv(inpOp, kH, kW, nOut, dH=1, dW=1, relu=True):
    global conv_counter
    global parameters
    name = 'conv' + str(conv_counter)
    conv_counter += 1
    with tf.name_scope(name) as scope:
        nIn = int(inpOp.get_shape()[-1])
        stddev = 5e-3
        kernel = tf.Variable(tf.truncated_normal([kH, kW, nIn, nOut],
                                                 dtype=tf.float32,
                                                 stddev=(kH*kW*nIn)**0.5*stddev), name='weights')
        
        conv = tf.nn.conv2d(inpOp, kernel, [1, 1, 1, 1],
                         padding="SAME")

        biases = tf.Variable(tf.constant(0.0, shape=[nOut], dtype=tf.float32),
                             trainable=True, name='biases')
        bias = tf.reshape(tf.nn.bias_add(conv, biases), conv.get_shape())
        if relu:
          bias = tf.nn.relu(bias, name=scope)
        #parameters += [kernel, biases]
        #bias = tf.Print(bias, [tf.sqrt(tf.reduce_mean(tf.square(inpOp - tf.reduce_mean(inpOp))))], message=kernel.name)
        tf.histogram_summary(scope+"/output", bias)
        tf.image_summary(scope+"/output", bias[:,:,:,0:3])
        tf.image_summary(scope+"/kernel_weight", tf.expand_dims(kernel[:,:,0:3,0], 0))
        # tf.image_summary(scope+"/point_weight", pointwise_filter)
        
        return bias
Example #26
0
File: dqn.py Project: danfeiX/drl
def train(lr, total_loss, global_step):
    # Variables that affect learning rate.

    # Compute gradients.
    #with tf.control_dependencies([loss_averages_op]):
    opt = tf.train.GradientDescentOptimizer(lr)
    grads = opt.compute_gradients(total_loss)

    # Add histograms for gradients.
    for i, (grad, var) in enumerate(grads):
        if grad is not None:
            tf.histogram_summary(var.op.name + '/gradients', grad)
            grads[i] = (tf.clip_by_norm(grad, 5), var)

    apply_gradient_op = opt.apply_gradients(grads, global_step=global_step)

    # Add histograms for trainable variables.
    for var in tf.trainable_variables():
        tf.histogram_summary(var.op.name, var)

    # Track the moving averages of all trainable variables.
    variable_averages = tf.train.ExponentialMovingAverage(
        MOVING_AVERAGE_DECAY, global_step)
    variables_averages_op = variable_averages.apply(tf.trainable_variables())

    with tf.control_dependencies([apply_gradient_op, variables_averages_op]):
        train_op = tf.no_op(name='train')

    return train_op
Example #27
0
    def bn(self, act=tf.nn.relu):
        """
        Batch normalization.
        See: http://arxiv.org/pdf/1502.03167v3.pdf
        Based on implementation found at: 
        http://www.r2rt.com/posts/implementations/2016-03-29-implementing-batch-normalization-tensorflow/
        """
        # Adding a name scope ensures logical grouping of the layers in the graph.

        layer_name = "bn" + str(len([l for l in self.layers
            if l["type"]=="bn"]))

        input_tensor = self.layers[-1]["activations"]
        
        with tf.name_scope(layer_name):
            
            dim = input_tensor.get_shape()[1:] # 64, 1, 10, 100
            
            beta = tf.Variable(tf.zeros(dim))
            scale = tf.Variable(tf.ones(dim))
            variable_summaries(beta, layer_name + "/beta")
            variable_summaries(scale, layer_name + "/scale")
            z = input_tensor
            batch_mean, batch_var = tf.nn.moments(input_tensor,[0])
            epsilon = 1e-3
            z_hat = (z - batch_mean) / tf.sqrt(batch_var + epsilon)
            bn_z = scale * z_hat + beta
            activations = act(bn_z, 'activation')
            tf.histogram_summary(layer_name + '/activations', activations)
              
        self.layers.append({
            "activations": activations,
            "type": "bn"})
        return self
Example #28
0
def run_training(cost_threshold=FLAGS.cost_threshold, max_steps=FLAGS.max_steps):
  global setup_done
  cost_value = 1e9
  accuracy_value = 0.0
  # if setup_done is False:
  setup_done = True
  opt = tf.train.AdamOptimizer()
  # try:
  #opt = tf.train.GradientDescentOptimizer(FLAGS.learning_rate)
  i_trains = [s.idx for s in trains]
  i_valids = [s.idx for s in valids]
  i_tests = [s.idx for s in tests]
  i_all = [s.idx for s in sentences]
  logits = batch_logits(i_ss, activations.ref())
  labs = batch_labels(i_ss)
  loss = calc_loss(logits, labs)
  i_ss_accuracy = accuracy(logits, labs)
  #v_labs = batch_labels(valid_ss)
  #v_logits = batch_logits(valid_ss, activations.ref())
  #v_loss = calc_loss(v_logits, v_labs)
  #train_accuracy = accuracy(logits, labs)
  #valid_accuracy = accuracy(v_logits, v_labs)
  # test_accuracy = accuracy(i_tests, activations.ref())
  train_op = opt.minimize(loss)
  #tf.histogram_summary('activations', activations)
  tf.histogram_summary('samples', i_ss)
  tf.scalar_summary('loss', loss)
  #tf.scalar_summary('training accuracy', train_accuracy)
  tf.scalar_summary('validation accuracy', i_ss_accuracy)
  # tf.scalar_summary('test accuracy', test_accuracy)
  merged = tf.merge_all_summaries()
  sess.run(tf.initialize_all_variables())
  writer = tf.train.SummaryWriter(
      '/Users/rgobbel/src/pymisc/rntn_tf/tf_logs', sess.graph)
  # except Exception as exc:
  #     print('Exception: {0}'.format(exc))
  # setup_done = False
  f_dict[i_ss] = random.sample(i_trains, FLAGS.batch_size)
  _, cost_value = sess.run([train_op, loss], feed_dict=f_dict)
  #f_dict[valid_ss] = i_valids
  _ = sess.run(zero_activations(activations.ref()), feed_dict=f_dict)
  print('starting')
  accuracy_value = sess.run([i_ss_accuracy], feed_dict=f_dict)
  for step in range(max_steps):
    #_ = sess.run(zero_activations(activations.ref()), feed_dict=f_dict)
    f_dict[i_ss] = random.sample(i_trains, FLAGS.batch_size)
    #logits = batch_logits(i_ss, activations.ref())
    #labs = batch_labels(i_ss)
    _, _, cost_value, _ = sess.run([tf.pack([i_ss]), train_op, loss], feed_dict=f_dict)
    #_ = sess.run(zero_activations(activations.ref()), feed_dict=f_dict)
    f_dict[i_ss] = i_valids
    _, valid_accuracy_value = sess.run([loss, i_ss_accuracy], feed_dict=f_dict)
    (summ,) = sess.run([merged], feed_dict=f_dict)
    # summ = sess.run([merged], feed_dict=f_dict)
    writer.add_summary(summ, step)
    writer.flush()
    print('.', end='', flush=True)
    if cost_value < cost_threshold:
      return step, cost_value, valid_accuracy_value
  return max_steps, cost_value, valid_accuracy_value
Example #29
0
def conv_nn_layer(input_tensor, window_width, window_height, input_dim, 
                  output_dim, layer_name, act=tf.nn.relu):
    """
    Defines a convolutional neural network layer
    """
    # Adding a name scope ensures logical grouping of the layers in the graph.
    with tf.name_scope(layer_name):
        # Define layer weights
        with tf.name_scope('weights'):
            weights = weight_variable([window_width, window_height, 
                                       input_dim, output_dim])
            variable_summaries(weights, layer_name + '/weights')
            
        # Define biases
        with tf.name_scope('biases'):
            biases = bias_variable([output_dim])
            variable_summaries(biases, layer_name + '/biases')
            
        # Convolve weights on image
        with tf.name_scope('preactivation'):
            preactivate = conv2d(input_tensor, weights) + biases
            tf.histogram_summary(layer_name + '/pre_activations', preactivate)
            
        # Determine layer activation
        activations = act(preactivate, 'activation')
        tf.histogram_summary(layer_name + '/activations', activations)
        return activations
Example #30
0
 def _process(self, grads):
     for grad, var in grads:
         tf.histogram_summary(var.op.name + '/grad', grad)
         tf.add_to_collection(MOVING_SUMMARY_VARS_KEY,
                              tf.sqrt(tf.reduce_mean(tf.square(grad)),
                                      name=var.op.name + '/gradRMS'))
     return grads
Example #31
0
for i in range(n_samples):
    x_i_encode = make_conv_net(x_i[:, i, :, :, :], scope, tie or i > 0,
                               not x_i_learn)
    x_i_inv_mag = tf.rsqrt(
        tf.clip_by_value(
            tf.reduce_sum(tf.square(x_i_encode), 1, keep_dims=True), eps,
            float("inf")))
    dotted = tf.squeeze(
        tf.batch_matmul(tf.expand_dims(x_hat_encode, 1),
                        tf.expand_dims(x_i_encode, 2)), [
                            1,
                        ])
    cos_sim_list.append(dotted * x_i_inv_mag)
    #*x_hat_inv_mag
cos_sim = tf.concat(1, cos_sim_list)
tf.histogram_summary('cos sim', cos_sim)
weighting = tf.nn.softmax(cos_sim)
label_prob = tf.squeeze(tf.batch_matmul(tf.expand_dims(weighting, 1), y_i))
tf.histogram_summary('label prob', label_prob)

top_k = tf.nn.in_top_k(label_prob, y_hat_ind, 1)
acc = tf.reduce_mean(tf.to_float(top_k))
tf.scalar_summary('train avg accuracy', acc)
correct_prob = tf.reduce_sum(
    tf.log(tf.clip_by_value(label_prob, eps, 1.0)) * y_hat, 1)
loss = tf.reduce_mean(-correct_prob, 0)
tf.scalar_summary('loss', loss)
optim = tf.train.GradientDescentOptimizer(learning_rate)
#optim = tf.train.AdamOptimizer(learning_rate)
grads = optim.compute_gradients(loss)
grad_summaries = [
def main(_):
    if not FLAGS.dataset_dir:
        raise ValueError(
            'You must supply the dataset directory with --dataset_dir')

    tf.logging.set_verbosity(tf.logging.INFO)
    with tf.Graph().as_default():
        ######################
        # Config model_deploy#
        ######################
        deploy_config = model_deploy.DeploymentConfig(
            num_clones=FLAGS.num_clones,
            clone_on_cpu=FLAGS.clone_on_cpu,
            replica_id=FLAGS.task,
            num_replicas=FLAGS.worker_replicas,
            num_ps_tasks=FLAGS.num_ps_tasks)

        # Create global_step
        with tf.device(deploy_config.variables_device()):
            global_step = slim.create_global_step()

        ######################
        # Select the dataset #
        ######################
        #dataset = dataset_factory.get_dataset(
        #FLAGS.dataset_name, FLAGS.dataset_split_name, FLAGS.dataset_dir)
        dataset = get_train_data.get_dataset(FLAGS.dataset_name,
                                             FLAGS.dataset_split_name,
                                             FLAGS.dataset_dir)
        ####################
        # Select the network #
        ####################
        network_fn = nets_factory.get_network_fn(
            FLAGS.model_name,
            num_classes=(dataset.num_classes - FLAGS.labels_offset),
            weight_decay=FLAGS.weight_decay,
            is_training=True)

        #####################################
        # Select the preprocessing function #
        #####################################
        preprocessing_name = FLAGS.preprocessing_name or FLAGS.model_name
        image_preprocessing_fn = preprocessing_factory.get_preprocessing(
            preprocessing_name, is_training=True)

        ##############################################################
        # Create a dataset provider that loads data from the dataset #
        ##############################################################
        with tf.device(deploy_config.inputs_device()):
            provider = slim.dataset_data_provider.DatasetDataProvider(
                dataset,
                num_readers=FLAGS.num_readers,
                common_queue_capacity=20 * FLAGS.batch_size,
                common_queue_min=10 * FLAGS.batch_size)
            [image, label] = provider.get(['image', 'label'])
            label -= FLAGS.labels_offset

            #train_image_size = FLAGS.train_image_size or network_fn.default_image_size

            #image = image_preprocessing_fn(image, train_image_size, train_image_size)
            image = tf.image.convert_image_dtype(image, dtype=tf.float32)

            images, labels = tf.train.batch(
                [image, label],
                batch_size=FLAGS.batch_size,
                num_threads=FLAGS.num_preprocessing_threads,
                capacity=5 * FLAGS.batch_size)
            labels = slim.one_hot_encoding(
                labels, dataset.num_classes - FLAGS.labels_offset)
            batch_queue = slim.prefetch_queue.prefetch_queue(
                [images, labels], capacity=2 * deploy_config.num_clones)

        ####################
        # Define the model #
        ####################
        def clone_fn(batch_queue):
            """Allows data parallelism by creating multiple clones of network_fn."""
            images, labels = batch_queue.dequeue()
            logits, end_points = network_fn(images)

            #############################
            # Specify the loss function #
            #############################
            if 'AuxLogits' in end_points:
                slim.losses.softmax_cross_entropy(
                    end_points['AuxLogits'],
                    labels,
                    label_smoothing=FLAGS.label_smoothing,
                    weight=0.4,
                    scope='aux_loss')
            slim.losses.softmax_cross_entropy(
                logits,
                labels,
                label_smoothing=FLAGS.label_smoothing,
                weight=1.0)
            return end_points

        # Gather initial summaries.
        summaries = set(tf.get_collection(tf.GraphKeys.SUMMARIES))

        clones = model_deploy.create_clones(deploy_config, clone_fn,
                                            [batch_queue])
        first_clone_scope = deploy_config.clone_scope(0)
        # Gather update_ops from the first clone. These contain, for example,
        # the updates for the batch_norm variables created by network_fn.
        update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS,
                                       first_clone_scope)

        # Add summaries for end_points.
        end_points = clones[0].outputs
        for end_point in end_points:
            x = end_points[end_point]
            summaries.add(tf.histogram_summary('activations/' + end_point, x))
            summaries.add(
                tf.scalar_summary('sparsity/' + end_point,
                                  tf.nn.zero_fraction(x)))

        # Add summaries for losses.
        for loss in tf.get_collection(tf.GraphKeys.LOSSES, first_clone_scope):
            summaries.add(tf.scalar_summary('losses/%s' % loss.op.name, loss))

        # Add summaries for variables.
        for variable in slim.get_model_variables():
            summaries.add(tf.histogram_summary(variable.op.name, variable))

        #################################
        # Configure the moving averages #
        #################################
        if FLAGS.moving_average_decay:
            moving_average_variables = slim.get_model_variables()
            variable_averages = tf.train.ExponentialMovingAverage(
                FLAGS.moving_average_decay, global_step)
        else:
            moving_average_variables, variable_averages = None, None

        #########################################
        # Configure the optimization procedure. #
        #########################################
        with tf.device(deploy_config.optimizer_device()):
            learning_rate = _configure_learning_rate(dataset.num_samples,
                                                     global_step)
            optimizer = _configure_optimizer(learning_rate)
            summaries.add(
                tf.scalar_summary('learning_rate',
                                  learning_rate,
                                  name='learning_rate'))

        if FLAGS.sync_replicas:
            # If sync_replicas is enabled, the averaging will be done in the chief
            # queue runner.
            optimizer = tf.train.SyncReplicasOptimizer(
                opt=optimizer,
                replicas_to_aggregate=FLAGS.replicas_to_aggregate,
                variable_averages=variable_averages,
                variables_to_average=moving_average_variables,
                replica_id=tf.constant(FLAGS.task, tf.int32, shape=()),
                total_num_replicas=FLAGS.worker_replicas)
        elif FLAGS.moving_average_decay:
            # Update ops executed locally by trainer.
            update_ops.append(
                variable_averages.apply(moving_average_variables))

        # Variables to train.
        variables_to_train = _get_variables_to_train()

        #  and returns a train_tensor and summary_op
        total_loss, clones_gradients = model_deploy.optimize_clones(
            clones, optimizer, var_list=variables_to_train)
        # Add total_loss to summary.
        summaries.add(
            tf.scalar_summary('total_loss', total_loss, name='total_loss'))

        # Create gradient updates.
        grad_updates = optimizer.apply_gradients(clones_gradients,
                                                 global_step=global_step)
        update_ops.append(grad_updates)

        update_op = tf.group(*update_ops)
        train_tensor = control_flow_ops.with_dependencies([update_op],
                                                          total_loss,
                                                          name='train_op')

        # Add the summaries from the first clone. These contain the summaries
        # created by model_fn and either optimize_clones() or _gather_clone_loss().
        summaries |= set(
            tf.get_collection(tf.GraphKeys.SUMMARIES, first_clone_scope))

        # Merge all summaries together.
        summary_op = tf.merge_summary(list(summaries), name='summary_op')

        ###########################
        # Kicks off the training. #
        ###########################
        slim.learning.train(
            train_tensor,
            logdir=FLAGS.train_dir,
            master=FLAGS.master,
            is_chief=(FLAGS.task == 0),
            init_fn=_get_init_fn(),
            summary_op=summary_op,
            number_of_steps=FLAGS.max_number_of_steps,
            log_every_n_steps=FLAGS.log_every_n_steps,
            save_summaries_secs=FLAGS.save_summaries_secs,
            save_interval_secs=FLAGS.save_interval_secs,
            sync_optimizer=optimizer if FLAGS.sync_replicas else None)
        optimizer = tf.train.AdamOptimizer(0.001)

        grads_and_vars = optimizer.compute_gradients(model.loss)

        train_op = optimizer.apply_gradients(grads_and_vars,
                                             global_step=global_step)

        # Keep track of gradient values and sparsity (optional)

        grad_summaries = []

        for g, v in grads_and_vars:

            if g is not None:
                grad_hist_summary = tf.histogram_summary(
                    "{}/grad/hist".format(v.name), g)

                sparsity_summary = tf.scalar_summary(
                    "{}/grad/sparsity".format(v.name), tf.nn.zero_fraction(g))

                grad_summaries.append(grad_hist_summary)

                grad_summaries.append(sparsity_summary)

        grad_summaries_merged = tf.merge_summary(grad_summaries)

        timestamp = str(int(time.time()))

        out_dir = os.path.abspath(
            os.path.join(os.path.curdir, "runs", timestamp))
    def buildModel(self, inMatFilename = None):
        if(inMatFilename):
            npWeights = loadWeights(inMatFilename)

        #Put all conv layers on gpu
        with tf.device('gpu:0'):
            with tf.name_scope("inputOps"):
                inputShape = self.dataObj.inputShape
                #Get convolution variables as placeholders
                self.inputImage = node_variable([None, inputShape[0], inputShape[1], inputShape[2]], "inputImage")
                self.gt = node_variable([None, 1], "gt")
                #Model variables for convolutions

            with tf.name_scope("Conv1Ops"):
                if(inMatFilename):
                    self.W_conv1 = weight_variable_fromnp(npWeights["conv1_w"], "w_conv1")
                    self.B_conv1 = weight_variable_fromnp(npWeights["conv1_b"], "b_conv1")
                else:
                    self.W_conv1 = weight_variable_fromnp(np.zeros((11, 11, 3, 64), dtype=np.float32), "w_conv1")
                    self.B_conv1 = weight_variable_fromnp(np.zeros((64), dtype=np.float32), "b_conv1")
                    ##First conv layer is 11x11, 3 input channels into 64 output channels
                    #self.W_conv1 = weight_variable_xavier([11, 11, 3, 64], "w_conv1", conv=True)
                    #self.B_conv1 = bias_variable([64], "b_conv1")
                self.h_conv1 = tf.nn.relu(conv2d(self.inputImage, self.W_conv1, "conv1") + self.B_conv1)
                self.h_norm1 = tf.nn.local_response_normalization(self.h_conv1, name="LRN1")
                #relu is communative op, so do relu after pool for efficiency
                self.h_pool1 = maxpool_2x2(self.h_norm1, "pool1")

            with tf.name_scope("Conv2Ops"):
                #Second conv layer is 5x5 conv, into 256 output channels
                if(inMatFilename):
                    self.W_conv2 = weight_variable_fromnp(npWeights["conv2_w"], "w_conv2")
                    self.B_conv2 = weight_variable_fromnp(npWeights["conv2_b"], "b_conv2")
                else:
                    self.W_conv2 = weight_variable_fromnp(np.zeros((5, 5, 64, 256), dtype=np.float32), "w_conv2")
                    self.B_conv2 = weight_variable_fromnp(np.zeros((256), dtype=np.float32), "b_conv2")
                    #self.W_conv2 = weight_variable_xavier([5, 5, 64, 256], "w_conv2", conv=True)
                    #self.B_conv2 = bias_variable([256], "b_conv2")
                self.h_conv2 = tf.nn.relu(conv2d(self.h_pool1, self.W_conv2, "conv2") + self.B_conv2)
                self.h_norm2 = tf.nn.local_response_normalization(self.h_conv2, name="LRN2")
                self.h_pool2 = maxpool_2x2(self.h_norm2, "pool2")

            #Third layer is 3x3 conv into 256 output channels
            #No pooling
            with tf.name_scope("Conv3Ops"):
                #Second conv layer is 5x5 conv, into 256 output channels
                if(inMatFilename):
                    self.W_conv3 = weight_variable_fromnp(npWeights["conv3_w"], "w_conv3")
                    self.B_conv3 = weight_variable_fromnp(npWeights["conv3_b"], "b_conv3")
                else:
                    self.W_conv3 = weight_variable_fromnp(np.zeros((3, 3, 256, 256), dtype=np.float32), "w_conv3")
                    self.B_conv3 = weight_variable_fromnp(np.zeros((256), dtype=np.float32), "b_conv3")
                    #self.W_conv3 = weight_variable_xavier([3, 3, 256, 256], "w_conv3", conv=True)
                    #self.B_conv3 = bias_variable([256], "b_conv3")
                self.h_conv3 = tf.nn.relu(conv2d(self.h_pool2, self.W_conv3, "conv3") + self.B_conv3, name="relu3")

            #Fourth layer is 3x3 conv into 256 output channels
            #No pooling
            with tf.name_scope("Conv4Ops"):
                #Second conv layer is 5x5 conv, into 256 output channels
                if(inMatFilename):
                    self.W_conv4 = weight_variable_fromnp(npWeights["conv4_w"], "w_conv4")
                    self.B_conv4 = weight_variable_fromnp(npWeights["conv4_b"], "b_conv4")
                else:
                    self.W_conv4 = weight_variable_fromnp(np.zeros((3, 3, 256, 256), dtype=np.float32), "w_conv4")
                    self.B_conv4 = weight_variable_fromnp(np.zeros((256), dtype=np.float32), "b_conv4")
                    #self.W_conv4 = weight_variable_xavier([3, 3, 256, 256], "w_conv4", conv=True)
                    #self.B_conv4 = bias_variable([256], "b_conv4")
                self.h_conv4 = tf.nn.relu(conv2d(self.h_conv3, self.W_conv4, "conv4") + self.B_conv4, name="relu4")

            #Fifth layer is 3x3 conv into 256 output channels
            #with pooling
            with tf.name_scope("Conv5Ops"):
                #Second conv layer is 5x5 conv, into 256 output channels
                if(inMatFilename):
                    self.W_conv5 = weight_variable_fromnp(npWeights["conv5_w"], "w_conv5")
                    self.B_conv5 = weight_variable_fromnp(npWeights["conv5_b"], "b_conv5")
                else:
                    self.W_conv5 = weight_variable_fromnp(np.zeros((3, 3, 256, 256), dtype=np.float32), "w_conv5")
                    self.B_conv5 = weight_variable_fromnp(np.zeros((256), dtype = np.float32), "b_conv5")
                    #self.W_conv5 = weight_variable_xavier([3, 3, 256, 256], "w_conv5", conv=True)
                    #self.B_conv5 = bias_variable([256], "b_conv5")
                self.h_conv5 = tf.nn.relu(conv2d(self.h_conv4, self.W_conv5, "conv5") + self.B_conv5)
                self.h_norm5 = tf.nn.local_response_normalization(self.h_conv5, name="LRN5")
                self.h_pool5 = maxpool_2x2(self.h_norm5, "pool5")

            #6th layer (not in paper) is 3x3 conv into 256 output channels
            #with pooling
            with tf.name_scope("Conv6Ops"):
                self.W_conv6 = weight_variable_xavier([3, 3, 256, 256], "w_conv6", conv=True)
                self.B_conv6 = bias_variable([256], "b_conv6")
                self.h_conv6 = conv2d(self.h_pool5, self.W_conv6, "conv6") + self.B_conv6
                self.h_pool6 = tf.nn.relu(maxpool_2x2(self.h_conv6, "pool6"), name="relu6")

            self.keep_prob = tf.placeholder(tf.float32)

            #Next is 3 fully connected layers
            #We should have downsampled by 8 at this point
            #fc1 should have 4096 channels
            numInputs = (inputShape[0]/16) * (inputShape[1]/16) * 256
            with tf.name_scope("FC1"):
                self.W_fc1 = weight_variable([numInputs, 2048], "w_fc1", 1e-6)
                self.B_fc1 = bias_variable([2048], "b_fc1")
                h_pool6_flat = tf.reshape(self.h_pool6, [-1, numInputs], name="pool6_flat")
                self.h_fc1 = tf.nn.relu(tf.matmul(h_pool6_flat, self.W_fc1, name="fc1") + self.B_fc1, "fc1_relu")
                self.h_fc1_drop = tf.nn.dropout(self.h_fc1, self.keep_prob)

        #Put all opt layers on cpu
        with tf.device('/cpu:0'):

            #fc2 should have 128 channels
            with tf.name_scope("FC2"):
                self.W_fc2 = weight_variable_xavier([2048, 128], "w_fc2", conv=False)
                self.B_fc2 = bias_variable([128], "b_fc2")
                self.h_fc2 = tf.nn.relu(tf.matmul(self.h_fc1_drop, self.W_fc2, name="fc2") + self.B_fc2, "fc2_relu")
                self.h_fc2_drop = tf.nn.dropout(self.h_fc2, self.keep_prob)

            #fc3 should have 16 channels
            #fc3 also uses a sigmoid function
            #We change it to tanh
            with tf.name_scope("FC3"):
                self.W_fc3 = weight_variable_xavier([128, 16], "w_fc3", conv=False)
                self.B_fc3 = bias_variable([16], "b_fc3")
                self.h_fc3 = tf.tanh(tf.matmul(self.h_fc2, self.W_fc3, name="fc3") + self.B_fc3, "fc3_relu")


            #Finally, fc4 condenses into 1 output value
            with tf.name_scope("FC4"):
                self.W_fc4 = weight_variable_xavier([16, 1], "w_fc4", conv=False)
                self.B_fc4 = bias_variable([1], "b_fc4")
                self.est = tf.matmul(self.h_fc3, self.W_fc4, name="est") + self.B_fc4

            with tf.name_scope("Loss"):
                #Define loss
                self.loss = tf.reduce_mean(tf.square(self.gt - self.est))/2

            with tf.name_scope("Opt"):
                #Define optimizer
                #self.optimizerAll = tf.train.AdagradOptimizer(self.learningRate).minimize(self.loss)
                #self.optimizerFC = tf.train.AdagradOptimizer(self.learningRate).minimize(self.loss,
                self.optimizerAll = tf.train.AdamOptimizer(self.learningRate).minimize(self.loss)
                self.optimizerFC = tf.train.AdamOptimizer(self.learningRate).minimize(self.loss,
                        var_list=[self.W_conv6,
                            self.B_conv6,
                            self.W_fc1,
                            self.B_fc1,
                            self.W_fc2,
                            self.B_fc2,
                            self.W_fc3,
                            self.B_fc3,
                            self.W_fc4,
                            self.B_fc4]
                        )

        #Summaries
        tf.scalar_summary('l2 loss', self.loss)
        tf.histogram_summary('input', self.inputImage)
        tf.histogram_summary('gt', self.gt)
        tf.histogram_summary('conv1', self.h_pool1)
        tf.histogram_summary('conv2', self.h_pool2)
        tf.histogram_summary('conv3', self.h_conv3)
        tf.histogram_summary('conv4', self.h_conv4)
        tf.histogram_summary('conv5', self.h_pool5)
        tf.histogram_summary('conv6', self.h_pool6)
        tf.histogram_summary('fc1', self.h_fc1)
        tf.histogram_summary('fc2', self.h_fc2)
        tf.histogram_summary('fc3', self.h_fc3)
        tf.histogram_summary('est', self.est)
        tf.histogram_summary('w_conv1', self.W_conv1)
        tf.histogram_summary('b_conv1', self.B_conv1)
        tf.histogram_summary('w_conv2', self.W_conv2)
        tf.histogram_summary('b_conv2', self.B_conv2)
        tf.histogram_summary('w_conv3', self.W_conv3)
        tf.histogram_summary('b_conv3', self.B_conv3)
        tf.histogram_summary('w_conv4', self.W_conv4)
        tf.histogram_summary('b_conv4', self.B_conv4)
        tf.histogram_summary('w_conv5', self.W_conv5)
        tf.histogram_summary('b_conv5', self.B_conv5)
        tf.histogram_summary('w_conv6', self.W_conv6)
        tf.histogram_summary('b_conv6', self.B_conv6)
        tf.histogram_summary('w_fc1', self.W_fc1)
        tf.histogram_summary('b_fc1', self.B_fc1)
        tf.histogram_summary('w_fc2', self.W_fc2)
        tf.histogram_summary('b_fc2', self.B_fc2)
        tf.histogram_summary('w_fc3', self.W_fc3)
        tf.histogram_summary('b_fc3', self.B_fc3)
        tf.histogram_summary('w_fc4', self.W_fc4)
        tf.histogram_summary('b_fc4', self.B_fc4)

        #Define saver
        self.saver = tf.train.Saver()
	#train_step = tf.train.GradientDescentOptimizer(0.01).minimize(cross_entropy)

	correct_prediction = tf.equal(tf.argmax(y,1), tf.argmax(y_,1))
	accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
	
	sess = tf.Session()
	
	#sess = tf.InteractiveSession()
	init = tf.global_variables_initializer()
	sess.run(init)
	
	#tf.contrib.deprecated.histogram_summary("Accuracy:", accuracy)
	
	#tf.summary.scalar_summary("Accuracy:", correct_prediction)
	tf.scalar_summary("Accuracy:", accuracy)
	tf.histogram_summary('weights', W)
	tf.histogram_summary('bias', b)
	tf.histogram_summary('softmax', tf_softmax)
	tf.histogram_summary('accuracy', accuracy)

	
	merged_summary = tf.summary.merge_all()
	
	writer = tf.summary.FileWriter('./graphs', sess.graph)
	

	
	#tf.summary.histogram_summary('softmax', y)
	
	
	#number of interations
Example #36
0
    y_conv = tf.nn.softmax(tf.matmul(h_fc1_drop, W_fc2) + b_fc2)

with tf.name_scope("cross_entropy") as scope:
    cross_entropy = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(y_conv, Y))
    ce_summ = tf.scalar_summary("cross_entropy", cross_entropy)

with tf.name_scope("train") as scope:
    train = tf.train.AdamOptimizer(learning_rate).minimize(cross_entropy)

with tf.name_scope("accuracy") as scope:
    correct_prediction = tf.equal(tf.argmax(Y, 1), tf.argmax(y_conv, 1))
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))
    accuracy_summ = tf.scalar_summary("accuracy", accuracy)

# Add histogram
W_conv1_hist = tf.histogram_summary("wegiht1", W_conv1)
W_conv2_hist = tf.histogram_summary("weight2", W_conv2)
W_fc1_hist = tf.histogram_summary("weigth3", W_fc1)
W_fc2_hist = tf.histogram_summary("weight4", W_fc2)
b_conv1_hist = tf.histogram_summary("bias1", b_conv1)
b_conv2_hist = tf.histogram_summary("bias2", b_conv2)
b_fc1_hist = tf.histogram_summary("bias3", b_fc1)
b_fc2_hist = tf.histogram_summary("bias4", b_fc2)

with tf.Session() as sess:
    tf.initialize_all_variables().run()
    merged = tf.merge_all_summaries()
    writer = tf.train.SummaryWriter("/tmp/CNN_logs", sess.graph)
    for i in range(1001):
        batch_xs, batch_ys = rotor.train.next_batch(batch_size)
        sess.run(train, feed_dict={X: batch_xs, Y: batch_ys, keep_prob: 0.5})
Example #37
0
    def _create_dilation_layer(self, input_batch, layer_index, dilation,
                               global_condition_batch, output_width):
        '''Creates a single causal dilated convolution layer.

        Args:
             input_batch: Input to the dilation layer.
             layer_index: Integer indicating which layer this is.
             dilation: Integer specifying the dilation size.
             global_conditioning_batch: Tensor containing the global data upon
                 which the output is to be conditioned upon. Shape:
                 [batch size, 1, channels]. The 1 is for the axis
                 corresponding to time so that the result is broadcast to
                 all time steps.

        The layer contains a gated filter that connects to dense output
        and to a skip connection:

               |-> [gate]   -|        |-> 1x1 conv -> skip output
               |             |-> (*) -|
        input -|-> [filter] -|        |-> 1x1 conv -|
               |                                    |-> (+) -> dense output
               |------------------------------------|

        Where `[gate]` and `[filter]` are causal convolutions with a
        non-linear activation at the output. Biases and global conditioning
        are omitted due to the limits of ASCII art.

        '''
        variables = self.variables['dilated_stack'][layer_index]

        weights_filter = variables['filter']
        weights_gate = variables['gate']

        conv_filter = causal_conv(input_batch, weights_filter, dilation)
        conv_gate = causal_conv(input_batch, weights_gate, dilation)

        if global_condition_batch is not None:
            weights_gc_filter = variables['gc_filtweights']
            conv_filter = conv_filter + tf.nn.conv1d(global_condition_batch,
                                                     weights_gc_filter,
                                                     stride=1,
                                                     padding="SAME",
                                                     name="gc_filter")
            weights_gc_gate = variables['gc_gateweights']
            conv_gate = conv_gate + tf.nn.conv1d(global_condition_batch,
                                                 weights_gc_gate,
                                                 stride=1,
                                                 padding="SAME",
                                                 name="gc_gate")

        if self.use_biases:
            filter_bias = variables['filter_bias']
            gate_bias = variables['gate_bias']
            conv_filter = tf.add(conv_filter, filter_bias)
            conv_gate = tf.add(conv_gate, gate_bias)

        out = tf.tanh(conv_filter) * tf.sigmoid(conv_gate)

        # The 1x1 conv to produce the residual output
        weights_dense = variables['dense']
        transformed = tf.nn.conv1d(out,
                                   weights_dense,
                                   stride=1,
                                   padding="SAME",
                                   name="dense")

        # The 1x1 conv to produce the skip output
        skip_cut = tf.shape(out)[1] - output_width
        out_skip = tf.slice(out, [0, skip_cut, 0], [-1, -1, -1])
        weights_skip = variables['skip']
        skip_contribution = tf.nn.conv1d(out_skip,
                                         weights_skip,
                                         stride=1,
                                         padding="SAME",
                                         name="skip")

        if self.use_biases:
            dense_bias = variables['dense_bias']
            skip_bias = variables['skip_bias']
            transformed = transformed + dense_bias
            skip_contribution = skip_contribution + skip_bias

        if self.histograms:
            layer = 'layer{}'.format(layer_index)
            tf.histogram_summary(layer + '_filter', weights_filter)
            tf.histogram_summary(layer + '_gate', weights_gate)
            tf.histogram_summary(layer + '_dense', weights_dense)
            tf.histogram_summary(layer + '_skip', weights_skip)
            if self.use_biases:
                tf.histogram_summary(layer + '_biases_filter', filter_bias)
                tf.histogram_summary(layer + '_biases_gate', gate_bias)
                tf.histogram_summary(layer + '_biases_dense', dense_bias)
                tf.histogram_summary(layer + '_biases_skip', skip_bias)

        input_cut = tf.shape(input_batch)[1] - tf.shape(transformed)[1]
        input_batch = tf.slice(input_batch, [0, input_cut, 0], [-1, -1, -1])

        return skip_contribution, input_batch + transformed
Example #38
0
def _add_split_loss(w, input_q, output_q):
    # Check input tensors' measurements
    assert len(w.get_shape()) == 2 or len(w.get_shape()) == 4
    in_dim, out_dim = w.get_shape().as_list()[-2:]
    assert len(input_q.get_shape()) == 2
    assert len(output_q.get_shape()) == 2
    assert in_dim == input_q.get_shape().as_list()[1]
    assert out_dim == output_q.get_shape().as_list()[1]
    assert input_q.get_shape().as_list()[0] == output_q.get_shape().as_list()[
        0]  # ngroups
    ngroups = input_q.get_shape().as_list()[0]
    assert ngroups > 1

    # Add split losses to collections
    T_list = []
    U_list = []
    if input_q not in tf.get_collection('OVERLAP_LOSS_WEIGHTS'):
        tf.add_to_collection('OVERLAP_LOSS_WEIGHTS', input_q)
        print('\t\tAdd overlap & split loss for %s' % input_q.name)
        for i in range(ngroups):
            for j in range(ngroups):
                if i == j:
                    continue
                T_list.append(tf.reduce_sum(input_q[i, :] * input_q[j, :]))
            U_list.append(tf.square(tf.reduce_sum(input_q[i, :])))
    if output_q not in tf.get_collection('OVERLAP_LOSS_WEIGHTS'):
        print('\t\tAdd overlap & split loss for %s' % output_q.name)
        tf.add_to_collection('OVERLAP_LOSS_WEIGHTS', output_q)
        for i in range(ngroups):
            for j in range(ngroups):
                if i == j:
                    continue
                T_list.append(tf.reduce_sum(output_q[i, :] * output_q[j, :]))
            U_list.append(tf.square(tf.reduce_sum(output_q[i, :])))
    if T_list:
        tf.add_to_collection('OVERLAP_LOSS', tf.add_n(T_list))
    if U_list:
        tf.add_to_collection('UNIFORM_LOSS', tf.add_n(U_list))

    S_list = []
    for i in range(ngroups):
        if len(w.get_shape()) == 4:
            w_reduce = tf.reduce_mean(tf.square(w), [0, 1])
            wg_row = tf.matmul(
                tf.matmul(tf.diag(tf.square(1 - input_q[i, :])), w_reduce),
                tf.diag(tf.square(output_q[i, :])))
            wg_row_l2 = tf.reduce_sum(tf.sqrt(tf.reduce_sum(wg_row, 1)))
            wg_col = tf.matmul(
                tf.matmul(tf.diag(tf.square(input_q[i, :])), w_reduce),
                tf.diag(tf.square(1 - output_q[i, :])))
            wg_col_l2 = tf.reduce_sum(tf.sqrt(tf.reduce_sum(wg_col, 0)))
        else:  # len(w.get_shape()) == 2
            wg_row = tf.matmul(tf.matmul(tf.diag(1 - input_q[i, :]), w),
                               tf.diag(output_q[i, :]))
            wg_row_l2 = tf.reduce_sum(
                tf.sqrt(tf.reduce_sum(wg_row * wg_row, 1)))
            wg_col = tf.matmul(tf.matmul(tf.diag(input_q[i, :]), w),
                               tf.diag(1 - output_q[i, :]))
            wg_col_l2 = tf.reduce_sum(
                tf.sqrt(tf.reduce_sum(wg_col * wg_col, 0)))
        S_list.append(wg_row_l2 + wg_col_l2)
    S = tf.add_n(S_list)
    tf.add_to_collection('WEIGHT_SPLIT', S)

    # Add histogram for w if split losses are added`
    scope_name = tf.get_variable_scope().name
    tf.histogram_summary("%s/weights" % scope_name, w)
    print('\t\tAdd split loss for %s(%dx%d, %d groups)' \
          % (tf.get_variable_scope().name, in_dim, out_dim, ngroups))

    return
Example #39
0
    def _build_model(self):
        """
        Builds the Tensorflow graph.
        """

        # Placeholders for our input
        # 80x80 input matrix
        self.X_pl = tf.placeholder(shape=[None, 80, 80, 4],
                                   dtype=tf.uint8,
                                   name="X")
        # The TD target value
        self.y_pl = tf.placeholder(shape=[None], dtype=tf.float32, name="y")
        # Integer id of which action was selected
        self.actions_pl = tf.placeholder(shape=[None],
                                         dtype=tf.int32,
                                         name="actions")

        X = tf.to_float(self.X_pl)
        batch_size = tf.shape(self.X_pl)[0]

        # Three convolutional layers
        conv1 = tf.contrib.layers.conv2d(X, 32, 8, 4, activation_fn=tf.nn.relu)
        conv2 = tf.contrib.layers.conv2d(conv1,
                                         64,
                                         4,
                                         2,
                                         activation_fn=tf.nn.relu)
        conv3 = tf.contrib.layers.conv2d(conv2,
                                         64,
                                         3,
                                         1,
                                         activation_fn=tf.nn.relu)

        # Fully connected layers
        flattened = tf.contrib.layers.flatten(conv3)
        fc1 = tf.contrib.layers.fully_connected(flattened, 512)
        self.predictions = tf.contrib.layers.fully_connected(
            fc1, len(VALID_ACTIONS))

        # Get the predictions for the chosen actions only
        gather_indices = tf.range(batch_size) * tf.shape(
            self.predictions)[1] + self.actions_pl
        self.action_predictions = tf.gather(tf.reshape(self.predictions, [-1]),
                                            gather_indices)

        # Calcualte the loss
        self.losses = tf.squared_difference(self.y_pl, self.action_predictions)
        self.loss = tf.reduce_mean(self.losses)

        # Optimizer Parameters from original paper
        self.optimizer = tf.train.RMSPropOptimizer(0.00025, 0.99, 0.0, 1e-6)
        self.train_op = self.optimizer.minimize(
            self.loss, global_step=tf.contrib.framework.get_global_step())

        # Summaries for Tensorboard
        self.summaries = tf.merge_summary([
            tf.scalar_summary("loss", self.loss),
            tf.histogram_summary("loss_hist", self.losses),
            tf.histogram_summary("q_values_hist", self.predictions),
            tf.scalar_summary("max_q_value", tf.reduce_max(self.predictions))
        ])
Example #40
0
    # Reshape word vector
    word_vec_reshape = tf.reshape(word_vec,
                                  [batch_size, input_size, embed_size])

    # LSTM(BiDirectional)
    BLSTM, each_output, sqlength = _bidirectionalLSTM(word_vec_reshape)

    # Dropout(Outputs)
    keep_prob_out = tf.placeholder(tf.float32)
    output_drop = tf.nn.dropout(BLSTM, keep_prob_out)

    # Weight & bias
    W = tf.Variable(tf.random_uniform([bi_hidden_size, tag_size], -1.0, 1.0))
    b = tf.Variable(tf.random_uniform([tag_size], -1.0, 1.0))
    W_hist = tf.histogram_summary("weights", W)
    b_hist = tf.histogram_summary("biases", b)

    # True Data
    y_ = tf.placeholder(tf.int32, [batch_size, input_size])

    # Softmax
    # y = tf.nn.softmax(tf.matmul(output_drop, W) + b)
    y = tf.matmul(output_drop, W)
    yreshape = tf.reshape(y, [batch_size, input_size, tag_size])

    # CRF
    crf_sqlengths = np.full(batch_size, input_size, dtype=np.int32)
    tf_crf_sqlengths = tf.constant(crf_sqlengths)
    crf_score, transition_params = _CRF(yreshape, y_, tf_crf_sqlengths)
Example #41
0
def add_activation_summary(var):
    tf.histogram_summary(var.op.name + "/activation", var)
    tf.scalar_summary(var.op.name + "/sparsity", tf.nn.zero_fraction(var))
                          padding='SAME')


x = tf.placeholder(tf.float32, shape=[None, 240, 320, 3])
y_ = tf.placeholder(tf.float32, shape=[None, 3])

W_conv1 = weight_variable([6, 6, 3, 16])
b_conv1 = bias_variable([16])
h_conv1 = tf.nn.relu(conv2d(x, W_conv1) + b_conv1)
dead_ReLUs1 = tf.placeholder(tf.float32, shape=[1])
h_pool1 = max_pool_2x2(h_conv1)

W_conv2 = weight_variable([6, 6, 16, 4])
b_conv2 = bias_variable([4])
h_conv2 = tf.nn.relu(conv2d(h_pool1, W_conv2) + b_conv2)
tf.histogram_summary('activations_layer_2', h_conv2)
h_pool2 = max_pool_2x2(h_conv2)

W_conv3 = weight_variable([6, 6, 4, 4])
b_conv3 = bias_variable([4])
h_conv3 = tf.nn.relu(conv2d(h_pool2, W_conv3) + b_conv3)
tf.histogram_summary('activations_layer_3', h_conv3)
h_pool3 = max_pool_2x2(h_conv3)

W_conv4 = weight_variable([6, 6, 4, 4])
b_conv4 = bias_variable([4])
h_conv4 = tf.nn.relu(conv2d(h_pool3, W_conv4) + b_conv4)
tf.histogram_summary('activations_layer_4', h_conv4)
h_pool4 = max_pool_2x2(h_conv4)

W_fc1 = weight_variable([15 * 20 * 4, 4])
Example #43
0
def _activation_summary(x, name_tensor=None):
    """Helper to create summaries for activations."""
    if name_tensor is None:
        name_tensor = x.op.name
    tf.histogram_summary(name_tensor + '/activations', x)
    tf.scalar_summary(name_tensor + '/sparsity', tf.nn.zero_fraction(x))
Example #44
0
def add_gradient_summary(grad, var):
    if grad is not None:
        tf.histogram_summary(var.op.name + "/gradient", grad)
Example #45
0
    "float", [None, 784])  # mnist data image of shape 28*28=784
output_tensor = tf.placeholder(
    "float", [None, 10])  # 0-9 digits recognition => 10 classes

# Create a model

# Set model weights
weights = tf.Variable(tf.zeros([784, 10]))
biases = tf.Variable(tf.zeros([10]))

with tf.name_scope("Wx_b") as scope:
    # Construct a linear model
    model = tf.nn.softmax(tf.matmul(input_tensor, weights) + biases)  # Softmax

# Add summary ops to collect data
weights_summary = tf.histogram_summary("weights", weights)
biases_summary = tf.histogram_summary("biases", biases)

# More name scopes will clean up graph representation
with tf.name_scope("cost_function") as scope:
    # Minimize error using cross entropy
    # Cross entropy
    cost_function = -tf.reduce_sum(output_tensor * tf.log(model))
    # Create a summary to monitor the cost function
    tf.scalar_summary("cost_function", cost_function)

# Training initialisation

with tf.name_scope("train") as scope:
    # Creates an optimization function that makes our model improve during training
    # Gradient descent
Example #46
0
def add_to_regularization_and_summary(var):
    if var is not None:
        tf.histogram_summary(var.op.name, var)
        tf.add_to_collection("reg_loss", tf.nn.l2_loss(var))
Example #47
0
    def buildModel(self, inputShape):
        assert (self.nT % self.VStrideT == 0)
        assert (inputShape[0] % self.VStrideY == 0)
        assert (inputShape[1] % self.VStrideX == 0)
        V_T = int(self.nT / self.VStrideT)
        V_Y = int(inputShape[0] / self.VStrideY)
        V_X = int(inputShape[1] / self.VStrideX)
        V_Tp = int(self.patchSizeT / self.VStrideT)
        V_Yp = int(self.patchSizeY / self.VStrideY)
        V_Xp = int(self.patchSizeX / self.VStrideX)
        V_Ofp = int(inputShape[2] * self.VStrideT * self.VStrideY *
                    self.VStrideX)

        self.imageShape = (self.batchSize, self.nT, inputShape[0],
                           inputShape[1], inputShape[2])
        self.WShape = (V_Tp, V_Yp, V_Xp, self.numV, V_Ofp)
        self.VShape = (self.batchSize, V_T, V_Y, V_X, self.numV)

        #Running on GPU
        with tf.device(self.device):
            with tf.name_scope("inputOps"):
                #Get convolution variables as placeholders
                self.inputImage = node_variable(self.imageShape, "inputImage")
                #Scale inputImage
                self.scaled_inputImage = self.inputImage / np.sqrt(
                    self.patchSizeX * self.patchSizeY * inputShape[2])
                #This is what it should be, but for now, we ignore the scaling with nT
                #self.scaled_inputImage = self.inputImage/np.sqrt(self.nT*self.patchSizeX*self.patchSizeY*inputShape[2])
                self.reshape_inputImage = transpose5dData(
                    self.scaled_inputImage, self.imageShape, self.VStrideT,
                    self.VStrideY, self.VStrideX)

            with tf.name_scope("Dictionary"):
                self.V1_W = sparse_weight_variable(self.WShape, "V1_W")
                #self.V1_W = sparse_weight_variable((self.patchSizeY, self.patchSizeX, inputShape[2], self.numV), "V1_W")

            with tf.name_scope("weightNorm"):
                self.normVals = tf.sqrt(
                    tf.reduce_sum(tf.square(self.V1_W),
                                  reduction_indices=[0, 1, 2, 4],
                                  keep_dims=True))
                #self.normVals = tf.sqrt(tf.reduce_sum(tf.square(self.V1_W), reduction_indices=[0, 1, 2], keep_dims=True))
                self.normalize_W = self.V1_W.assign(self.V1_W / self.normVals)

            with tf.name_scope("ISTA"):
                #Variable for activity
                self.V1_A = weight_variable(self.VShape, "V1_A", 1e-4)
                self.zeroConst = tf.zeros(self.VShape)
                self.t_V1_A = tf.select(
                    tf.abs(self.V1_A) < self.zeroThresh, self.zeroConst,
                    self.V1_A)

                #self.V1_A= weight_variable((self.batchSize, inputShape[0], inputShape[1], self.numV), "V1_A", .01)

            with tf.name_scope("Recon"):
                assert (self.VStrideT >= 1)
                assert (self.VStrideY >= 1)
                assert (self.VStrideX >= 1)
                #We build index tensor in numpy to gather
                self.recon = conv3d(self.V1_A, self.V1_W, "recon")
                self.t_recon = conv3d(self.t_V1_A, self.V1_W, "recon")

            with tf.name_scope("Error"):
                self.error = self.reshape_inputImage - self.recon
                self.t_error = self.reshape_inputImage - self.t_recon

            with tf.name_scope("Loss"):
                self.reconError = tf.reduce_sum(tf.square(self.error))
                self.l1Sparsity = tf.reduce_sum(tf.abs(self.V1_A))
                #Define loss
                self.loss = self.reconError / 2 + self.thresh * self.l1Sparsity

                self.t_reconError = tf.reduce_sum(tf.square(self.t_error))
                self.t_l1Sparsity = tf.reduce_sum(tf.abs(self.t_V1_A))
                #Define loss
                self.t_loss = self.t_reconError / 2 + self.thresh * self.t_l1Sparsity

            with tf.name_scope("Opt"):
                #Define optimizer
                #self.optimizerA = tf.train.GradientDescentOptimizer(self.learningRateA).minimize(self.loss,
                self.optimizerA = tf.train.AdamOptimizer(
                    self.learningRateA).minimize(self.loss,
                                                 var_list=[self.V1_A])
                #self.optimizerW = tf.train.GradientDescentOptimizer(self.learningRateW).minimize(self.loss,
                self.optimizerW = tf.train.AdamOptimizer(
                    self.learningRateW).minimize(self.loss,
                                                 var_list=[self.V1_W])

            with tf.name_scope("stats"):
                self.underThresh = tf.reduce_mean(
                    tf.cast(tf.abs(self.V1_A) > self.zeroThresh, tf.float32))

                self.errorStd = tf.sqrt(
                    tf.reduce_mean(
                        tf.square(self.error - tf.reduce_mean(self.error)))
                ) * np.sqrt(self.patchSizeY * self.patchSizeX * inputShape[2])
                self.l1_mean = tf.reduce_mean(tf.abs(self.V1_A))

                self.t_errorStd = tf.sqrt(
                    tf.reduce_mean(
                        tf.square(self.t_error - tf.reduce_mean(self.t_error)))
                ) * np.sqrt(self.patchSizeY * self.patchSizeX * inputShape[2])
                self.t_l1_mean = tf.reduce_mean(tf.abs(self.t_V1_A))

                #Reshape weights for viewing
                self.reshape_weight = transpose5dWeight(
                    self.V1_W, self.WShape, self.VStrideT, self.VStrideY,
                    self.VStrideX)
                self.weightImages = tf.reshape(
                    tf.transpose(self.reshape_weight, [3, 0, 1, 2, 4]), [
                        self.numV * self.patchSizeT, self.patchSizeY,
                        self.patchSizeX, inputShape[2]
                    ])
                #For image viewing
                self.frameImages = self.inputImage[0, :, :, :, :]
                self.reshaped_recon = undoTranspose5dData(
                    self.recon, self.imageShape, self.VStrideT, self.VStrideY,
                    self.VStrideX)
                self.frameRecons = self.reshaped_recon[0, :, :, :, :]

                self.t_reshaped_recon = undoTranspose5dData(
                    self.t_recon, self.imageShape, self.VStrideT,
                    self.VStrideY, self.VStrideX)
                self.t_frameRecons = self.t_reshaped_recon[0, :, :, :, :]
                #For log of activities
                self.log_V1_A = tf.log(tf.abs(self.V1_A) + 1e-15)

        #Summaries
        self.s_loss = tf.scalar_summary('loss', self.loss, name="lossSum")
        self.s_recon = tf.scalar_summary('recon error',
                                         self.reconError,
                                         name="reconError")
        self.s_errorStd = tf.scalar_summary('errorStd',
                                            self.errorStd,
                                            name="errorStd")
        self.s_l1 = tf.scalar_summary('l1 sparsity',
                                      self.l1Sparsity,
                                      name="l1Sparsity")
        self.s_l1_mean = tf.scalar_summary('l1 mean',
                                           self.l1_mean,
                                           name="l1Mean")
        self.s_s_nnz = tf.scalar_summary('nnz', self.underThresh, name="nnz")

        self.s_t_loss = tf.scalar_summary('t loss',
                                          self.t_loss,
                                          name="t_lossSum")
        self.s_t_recon = tf.scalar_summary('t recon error',
                                           self.t_reconError,
                                           name="t_reconError")
        self.s_t_errorStd = tf.scalar_summary('t errorStd',
                                              self.t_errorStd,
                                              name="t_errorStd")
        self.s_t_l1 = tf.scalar_summary('t l1 sparsity',
                                        self.t_l1Sparsity,
                                        name="t_l1Sparsity")
        self.s_t_l1_mean = tf.scalar_summary('t l1 mean',
                                             self.t_l1_mean,
                                             name="t_l1Mean")

        self.h_input = tf.histogram_summary('input',
                                            self.inputImage,
                                            name="input")
        self.h_recon = tf.histogram_summary('recon', self.recon, name="recon")
        self.h_v1_w = tf.histogram_summary('V1_W', self.V1_W, name="V1_W")

        self.h_v1_a = tf.histogram_summary('V1_A', self.V1_A, name="V1_A")
        self.h_log_v1_a = tf.histogram_summary('Log_V1_A',
                                               self.log_V1_A,
                                               name="Log_V1_A")

        self.h_normVals = tf.histogram_summary('normVals',
                                               self.normVals,
                                               name="normVals")
Example #48
0
  def train(self, states, actions, rewards, initial, **unused):
    n = self.rlConfig.tdN
    
    state_shape = tf.shape(states)
    batch_size = state_shape[0]
    experience_length = state_shape[1]

    train_length = experience_length - n
    
    # if not natural
    q_outputs, q_hidden = tf.nn.dynamic_rnn(self.q_rnn, states, initial_state=initial)
    
    predictedQs = self.q_out(q_outputs)
    takenQs = tfl.batch_dot(actions, predictedQs)
    trainQs = tf.slice(takenQs, [0, 0], [-1, train_length])
    
    # smooth between TD(m) for m<=n?
    targets = tf.slice(takenQs, [0, n], [-1, train_length])
    #targets = values[:,n:]
    for i in reversed(range(n)):
      targets *= self.rlConfig.discount
      targets += tf.slice(rewards, [0, i], [-1, train_length])
    targets = tf.stop_gradient(targets)
    
    """ TODO: do we still want this code path for maxQ/sarsa?
    targetQs = predictedQs
    realQs = tfl.batch_dot(actions, targetQs)
    maxQs = tf.reduce_max(targetQs, -1)
    targetQs = realQs if self.sarsa else maxQs
    
    tf.scalar_summary("q_max", tf.reduce_mean(maxQs))
    
    # smooth between TD(m) for m<=n?
    targets = tf.slice(targetQs, [0, n], [-1, train_length])
    for i in reversed(range(n)):
      targets = tf.slice(rewards, [0, i], [-1, train_length]) + self.rlConfig.discount * targets
    targets = tf.stop_gradient(targets)
    """
    
    qLoss = tf.reduce_mean(tf.squared_difference(trainQs, targets))
    tf.scalar_summary("q_loss", qLoss)
    tf.scalar_summary("q_uev", qLoss / tfl.sample_variance(targets))
    
    # all this just to log entropy statistics
    flatQs = tf.reshape(predictedQs, [-1, self.action_size])
    action_probs = tf.nn.softmax(flatQs / self.temperature)
    action_probs = (1.0 - self.epsilon) * action_probs + self.epsilon / self.action_size
    log_action_probs = tf.log(action_probs)
    entropy = -tfl.batch_dot(action_probs, log_action_probs)
    tf.scalar_summary("entropy_avg", tf.reduce_mean(entropy))
    tf.histogram_summary("entropy", entropy)
    
    meanQs = tfl.batch_dot(action_probs, flatQs)
    tf.scalar_summary("q_mean", tf.reduce_mean(meanQs))
    
    params = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope='q')
    
    def metric(q1, q2):
      return tf.reduce_mean(tf.squared_difference(q1, q2))

    trainQ = self.optimizer.optimize(qLoss, params, predictedQs, metric)
    return trainQ
    
    """
Example #49
0
  def build_dqn(self):
    self.w = {}
    self.t_w = {}

    #initializer = tf.contrib.layers.xavier_initializer()
    initializer = tf.truncated_normal_initializer(0, 0.02)
    activation_fn = tf.nn.relu

    # training network
    with tf.variable_scope('prediction'):
      if self.cnn_format == 'NHWC':
        self.s_t = tf.placeholder('float32',
            [None, self.screen_width, self.screen_height, self.history_length], name='s_t')
      else:
        self.s_t = tf.placeholder('float32',
            [None, self.history_length, self.screen_width, self.screen_height], name='s_t')

      self.l1, self.w['l1_w'], self.w['l1_b'] = conv2d(self.s_t,
          32, [8, 8], [4, 4], initializer, activation_fn, self.cnn_format, name='l1')
      self.l2, self.w['l2_w'], self.w['l2_b'] = conv2d(self.l1,
          64, [4, 4], [2, 2], initializer, activation_fn, self.cnn_format, name='l2')
      self.l3, self.w['l3_w'], self.w['l3_b'] = conv2d(self.l2,
          64, [3, 3], [1, 1], initializer, activation_fn, self.cnn_format, name='l3')

      shape = self.l3.get_shape().as_list()
      self.l3_flat = tf.reshape(self.l3, [-1, reduce(lambda x, y: x * y, shape[1:])])

      self.l4, self.w['l4_w'], self.w['l4_b'] = linear(self.l3_flat, 512, activation_fn=activation_fn, name='l4')
      self.q, self.w['q_w'], self.w['q_b'] = linear(self.l4, self.env.action_size, name='q')
      self.q_action = tf.argmax(self.q, dimension=1)

      q_summary = []
      avg_q = tf.reduce_mean(self.q, 0)
      for idx in xrange(self.env.action_size):
        q_summary.append(tf.histogram_summary('q/%s' % idx, avg_q[idx]))
      self.q_summary = tf.merge_summary(q_summary, 'q_summary')

    # target network
    with tf.variable_scope('target'):
      if self.cnn_format == 'NHWC':
        self.target_s_t = tf.placeholder('float32', 
            [None, self.screen_width, self.screen_height, self.history_length], name='target_s_t')
      else:
        self.target_s_t = tf.placeholder('float32', 
            [None, self.history_length, self.screen_width, self.screen_height], name='target_s_t')

      self.target_l1, self.t_w['l1_w'], self.t_w['l1_b'] = conv2d(self.target_s_t, 
          32, [8, 8], [4, 4], initializer, activation_fn, self.cnn_format, name='target_l1')
      self.target_l2, self.t_w['l2_w'], self.t_w['l2_b'] = conv2d(self.target_l1,
          64, [4, 4], [2, 2], initializer, activation_fn, self.cnn_format, name='target_l2')
      self.target_l3, self.t_w['l3_w'], self.t_w['l3_b'] = conv2d(self.target_l2,
          64, [3, 3], [1, 1], initializer, activation_fn, self.cnn_format, name='target_l3')

      shape = self.target_l3.get_shape().as_list()
      self.target_l3_flat = tf.reshape(self.target_l3, [-1, reduce(lambda x, y: x * y, shape[1:])])

      self.target_l4, self.t_w['l4_w'], self.t_w['l4_b'] = \
          linear(self.target_l3_flat, 512, activation_fn=activation_fn, name='target_l4')
      self.target_q, self.t_w['q_w'], self.t_w['q_b'] = \
          linear(self.target_l4, self.env.action_size, name='target_q')

    with tf.variable_scope('pred_to_target'):
      self.t_w_input = {}
      self.t_w_assign_op = {}

      for name in self.w.keys():
        self.t_w_input[name] = tf.placeholder('float32', self.t_w[name].get_shape().as_list(), name=name)
        self.t_w_assign_op[name] = self.t_w[name].assign(self.t_w_input[name])

    # optimizer
    with tf.variable_scope('optimizer'):
      self.target_q_t = tf.placeholder('float32', [None], name='target_q_t')
      self.action = tf.placeholder('int64', [None], name='action')

      action_one_hot = tf.one_hot(self.action, self.env.action_size, 1.0, 0.0, name='action_one_hot')
      q_acted = tf.reduce_sum(self.q * action_one_hot, reduction_indices=1, name='q_acted')

      self.delta = self.target_q_t - q_acted
      self.clipped_delta = tf.clip_by_value(self.delta, self.min_delta, self.max_delta, name='clipped_delta')

      self.loss = tf.reduce_mean(tf.square(self.clipped_delta), name='loss')
      self.optim = tf.train.RMSPropOptimizer(self.learning_rate, momentum=0.95, epsilon=0.01).minimize(self.loss)

    with tf.variable_scope('summary'):
      scalar_summary_tags = ['average/reward', 'average/loss', 'average/q', \
          'episode/max reward', 'episode/min reward', 'episode/avg reward', 'episode/num of game']

      self.summary_placeholders = {}
      self.summary_ops = {}

      for tag in scalar_summary_tags:
        self.summary_placeholders[tag] = tf.placeholder('float32', None, name=tag.replace(' ', '_'))
        self.summary_ops[tag]  = tf.scalar_summary(tag, self.summary_placeholders[tag])

      histogram_summary_tags = ['episode/rewards', 'episode/actions']

      for tag in histogram_summary_tags:
        self.summary_placeholders[tag] = tf.placeholder('float32', None, name=tag.replace(' ', '_'))
        self.summary_ops[tag]  = tf.histogram_summary(tag, self.summary_placeholders[tag])

      self.writer = tf.train.SummaryWriter('./logs/%s' % self.model_dir, self.sess.graph)

    tf.initialize_all_variables().run()

    self._saver = tf.train.Saver(self.w.values() + [self.step_op], max_to_keep=30)

    self.load_model()
    self.update_target_q_network()
def add_final_training_ops(class_count, final_tensor_name, bottleneck_tensor):
    """Adds a new softmax and fully-connected layer for training.

  We need to retrain the top layer to identify our new classes, so this function
  adds the right operations to the graph, along with some variables to hold the
  weights, and then sets up all the gradients for the backward pass.

  The set up for the softmax and fully-connected layers is based on:
  https://tensorflow.org/versions/master/tutorials/mnist/beginners/index.html

  Args:
    class_count: Integer of how many categories of things we're trying to
    recognize.
    final_tensor_name: Name string for the new final node that produces results.
    bottleneck_tensor: The output of the main CNN graph.

  Returns:
    The tensors for the training and cross entropy results, and tensors for the
    bottleneck input and ground truth input.
  """
    with tf.name_scope('input'):
        bottleneck_input = tf.placeholder_with_default(
            bottleneck_tensor,
            shape=[None, BOTTLENECK_TENSOR_SIZE],
            name='BottleneckInputPlaceholder')

        ground_truth_input = tf.placeholder(tf.float32, [None, class_count],
                                            name='GroundTruthInput')

    # Organizing the following ops as `final_training_ops` so they're easier
    # to see in TensorBoard
    layer_name = 'final_training_ops'
    with tf.name_scope(layer_name):
        with tf.name_scope('weights'):
            layer_weights = tf.Variable(tf.truncated_normal(
                [BOTTLENECK_TENSOR_SIZE, class_count], stddev=0.001),
                                        name='final_weights')
            variable_summaries(layer_weights, layer_name + '/weights')
        with tf.name_scope('biases'):
            layer_biases = tf.Variable(tf.zeros([class_count]),
                                       name='final_biases')
            variable_summaries(layer_biases, layer_name + '/biases')
        with tf.name_scope('Wx_plus_b'):
            logits = tf.matmul(bottleneck_input, layer_weights) + layer_biases
            tf.histogram_summary(layer_name + '/pre_activations', logits)

    final_tensor = tf.nn.softmax(logits, name=final_tensor_name)
    tf.histogram_summary(final_tensor_name + '/activations', final_tensor)

    with tf.name_scope('cross_entropy'):
        cross_entropy = tf.nn.softmax_cross_entropy_with_logits(
            logits, ground_truth_input)
        with tf.name_scope('total'):
            cross_entropy_mean = tf.reduce_mean(cross_entropy)
        tf.scalar_summary('cross entropy', cross_entropy_mean)

    with tf.name_scope('train'):
        train_step = tf.train.GradientDescentOptimizer(
            FLAGS.learning_rate).minimize(cross_entropy_mean)

    return (train_step, cross_entropy_mean, bottleneck_input,
            ground_truth_input, final_tensor)
Example #51
0
def model(X, w_h, w_o, b, b2):
    # Add layer name scopes for better graph visualization
    with tf.name_scope("Layer2"):
        h = tf.nn.relu(tf.matmul(X, w_h) + b)
    with tf.name_scope("Layer3"):
        return tf.nn.softmax(tf.matmul(h, w_o) + b2)


#Initialize weights
w_h = init_weights([2, number_hidden_nodes], "w_h")
w_o = init_weights([number_hidden_nodes, 2], "w_o")
b = tf.Variable(tf.zeros([number_hidden_nodes]))
b2 = tf.Variable(tf.zeros([2]))

#Histogram summaries for weights
tf.histogram_summary("w_h_summ", w_h)
tf.histogram_summary("w_o_summ", w_o)

#Create Model (One hidden layer)
py_x = model(x, w_h, w_o, b, b2)

#Cost function
with tf.name_scope("cost") as scope:
    cost = -tf.reduce_sum(y_ * tf.log(py_x))
    train_op = tf.train.GradientDescentOptimizer(0.2).minimize(cost)
    # Add scalar summary for cost tensor
    tf.scalar_summary("cost", cost)

#Measure accuracy
with tf.name_scope("accuracy"):
    correct_pred = tf.equal(tf.argmax(expect, 1),
    def create_variables(self):
        self.target_actor = self.actor.copy(scope="target_actor")
        self.target_critic = self.critic.copy(scope="target_critic")

        # FOR REGULAR ACTION SCORE COMPUTATION
        with tf.name_scope("taking_action"):
            #            self.observation  = tf.placeholder(tf.float32, (None, self.observation_size), name="observation")
            self.actor_val = self.actor(self.observation)
            #            self.actor_val = tf.placeholder(tf.float32, (None, 20, 2), name="asd")
            #            self.actor_action = tf.identity(self.get_last(self.actor_val), name="actor_action")
            self.actor_action = tf.identity(self.actor.get_last(
                self.observation),
                                            name="actor_action")
#            self.actor_action = tf.identity([[1.1, 1.1]], name="actor_action")
#            tf.histogram_summary("actions", self.actor_action)

# FOR PREDICTING TARGET FUTURE REWARDS
        with tf.name_scope("estimating_future_reward"):
            #            self.next_observation          = tf.placeholder(tf.float32, (None, self.observation_size), name="next_observation")
            self.next_observation_mask = tf.placeholder(
                tf.float32, (None, ), name="next_observation_mask")
            self.next_action = tf.stop_gradient(
                self.target_actor(self.next_observation))  # ST
            #            print "next action: " + str(self.next_action)
            tf.histogram_summary("target_actions", self.next_action)
            self.next_value = tf.stop_gradient(
                tf.reshape(
                    self.target_critic(
                        self.concat_nn_lstm_input(
                            self.next_observation,
                            self.add_pow_values(self.next_action))),
                    [-1]))  # ST
            self.rewards = tf.placeholder(tf.float32, (None, ), name="rewards")
            self.future_reward = self.rewards + self.discount_rate * self.next_observation_mask * self.next_value

        with tf.name_scope("critic_update"):
            ##### ERROR FUNCTION #####
            #            self.given_action               = tf.placeholder(tf.float32, (None, self.action_size), name="given_action")
            self.value_given_action = tf.reshape(
                self.critic(
                    self.concat_nn_lstm_input(
                        self.observation,
                        self.add_pow_values(self.given_action))), [-1])

            tf.scalar_summary("value_for_given_action",
                              tf.reduce_mean(self.value_given_action))
            temp_diff = self.value_given_action - self.future_reward

            self.critic_error = tf.identity(tf.reduce_mean(
                tf.square(temp_diff)),
                                            name='critic_error')
            ##### OPTIMIZATION #####
            critic_gradients = self.optimizer.compute_gradients(
                self.critic_error, var_list=self.critic.variables())
            # Add histograms for gradients.
            for grad, var in critic_gradients:
                tf.histogram_summary('critic_update/' + var.name, var)
                if grad is not None:
                    tf.histogram_summary(
                        'critic_update/' + var.name + '/gradients', grad)
            self.critic_update = self.optimizer.apply_gradients(
                critic_gradients, name='critic_train_op')
            tf.scalar_summary("critic_error", self.critic_error)

        with tf.name_scope("actor_update"):
            ##### ERROR FUNCTION #####
            self.actor_score = self.critic(
                self.concat_nn_lstm_input(self.observation,
                                          self.add_pow_values(self.actor_val)))

            ##### OPTIMIZATION #####
            # here we are maximizing actor score.
            # only optimize actor variables here, while keeping critic constant
            actor_gradients = self.optimizer.compute_gradients(
                tf.reduce_mean(-self.actor_score),
                var_list=self.actor.variables())
            # Add histograms for gradients.
            for grad, var in actor_gradients:
                tf.histogram_summary('actor_update/' + var.name, var)
                if grad is not None:
                    tf.histogram_summary(
                        'actor_update/' + var.name + '/gradients', grad)
            self.actor_update = self.optimizer.apply_gradients(
                actor_gradients, name='actor_train_op')
            tf.scalar_summary("actor_score", tf.reduce_mean(self.actor_score))

        # UPDATE TARGET NETWORK
        with tf.name_scope("target_network_update"):
            self.target_actor_update = ContinuousDeepQLSTMStepped.update_target_network(
                self.actor, self.target_actor, self.target_actor_update_rate)
            self.target_critic_update = ContinuousDeepQLSTMStepped.update_target_network(
                self.critic, self.target_critic,
                self.target_critic_update_rate)
            self.update_all_targets = tf.group(self.target_actor_update,
                                               self.target_critic_update,
                                               name='target_networks_update')

        self.summarize = tf.merge_all_summaries()
        self.no_op1 = tf.no_op()
Example #53
0
def main(_):
    # Download data if no local copy exists
    data_sets = input_data.read_data_sets(FLAGS.train_dir,
                                          one_hot=True,
                                          target_label=FLAGS.target_label)

    # Create the session
    sess = tf.InteractiveSession()

    # Input and label placeholders
    num_classes = data_sets.train.num_classes
    num_features = data_sets.train.num_features
    x = tf.placeholder('float', shape=[None, num_features], name='x-input')
    y_ = tf.placeholder('float', shape=[None, num_classes], name='y-input')
    keep_prob = tf.placeholder('float', name='k-prob')

    # Convolutional layer
    with tf.name_scope('conv'):
        W_conv = weight_variable([4, 4, 1, FLAGS.conv_fmaps])
        b_conv = bias_variable([FLAGS.conv_fmaps])

        # Reshape and convolve
        x_image = tf.reshape(x, [-1, 28, 28, 1])
        h_conv = tf.nn.relu(conv2d(x_image, W_conv) + b_conv)
        #h_pool = max_pool_2x2(h_conv)

    # Fully connected layer1
    with tf.name_scope('fc_1'):
        W_fc1 = weight_variable([13 * 13 * FLAGS.conv_fmaps, FLAGS.fc1_nodes])
        b_fc1 = bias_variable([FLAGS.fc1_nodes])

        # Reshape and apply relu
        #h_pool1_flat = tf.reshape(h_pool, [-1, 7 * 7 * FLAGS.conv_fmaps])
        #h_fc1 = tf.nn.relu(tf.matmul(h_pool1_flat, W_fc1) + b_fc1)
        # If the max_pool operation is ignored...
        h_flat = tf.reshape(h_conv, [-1, 13 * 13 * FLAGS.conv_fmaps])
        h_fc1 = tf.nn.relu(tf.matmul(h_flat, W_fc1) + b_fc1)

    # Fully connected layer2
    with tf.name_scope('fc_2'):
        W_fc2 = weight_variable([FLAGS.fc1_nodes, FLAGS.fc2_nodes])
        b_fc2 = bias_variable([FLAGS.fc2_nodes])

        # Apply relu
        h_fc2 = tf.nn.relu(tf.matmul(h_fc1, W_fc2) + b_fc2)

        # Apply dropout to fc_2 output
        h_fc2_drop = tf.nn.dropout(h_fc2, keep_prob)

    # Readout layer
    with tf.name_scope('readout'):
        W_out = weight_variable([FLAGS.fc2_nodes, num_classes])
        b_out = bias_variable([num_classes])

        y = tf.nn.softmax(tf.matmul(h_fc2_drop, W_out) + b_out)

    # Add summary ops for tensorboard
    _ = tf.histogram_summary('W_conv', W_conv)
    _ = tf.histogram_summary('W_fc1', W_fc1)
    _ = tf.histogram_summary('W_fc2', W_fc2)
    _ = tf.histogram_summary('W_out', W_out)
    _ = tf.histogram_summary('b_conv', b_conv)
    _ = tf.histogram_summary('b_fc1', b_fc1)
    _ = tf.histogram_summary('b_fc2', b_fc2)
    _ = tf.histogram_summary('b_out', b_out)
    _ = tf.histogram_summary('Output', y)

    # Cost function
    with tf.name_scope('xent'):
        x_entropy = -tf.reduce_sum(y_ * tf.log(y))
        _ = tf.scalar_summary('xentropy', x_entropy)

    # Train the model
    with tf.name_scope('train'):
        train_step = tf.train.AdamOptimizer(
            FLAGS.learning_rate).minimize(x_entropy)

    # Evaluate model
    with tf.name_scope('eval'):
        correct_prediction = tf.equal(tf.argmax(y, 1), tf.argmax(y_, 1))
        accuracy = tf.reduce_mean(tf.cast(correct_prediction, 'float'))
        _ = tf.scalar_summary('accuracy', accuracy)

    # Collect all summaries during graph building
    summary_op = tf.merge_all_summaries()
    summary_writer = tf.train.SummaryWriter(FLAGS.train_dir,
                                            graph_def=sess.graph_def)

    sess.run(tf.initialize_all_variables())

    # Train the model and record summaries
    for i in range(FLAGS.max_steps):
        if i % 50 == 0:
            # Generate a new feed dictionary to test training accuracy
            feed_dict = fill_feed_dict(data_sets.train, x, y_,
                                       (keep_prob, 1.0))
            # Update the summary collection
            result = sess.run([summary_op, accuracy], feed_dict=feed_dict)
            summary_str = result[0]
            summary_writer.add_summary(summary_str, i)
            train_accuracy = result[1]
            # Print status update
            print('step %d, training accuracy %g' % (i, train_accuracy))
        else:
            # Generate a new feed dictionary for the next training batch
            feed_dict = fill_feed_dict(data_sets.train, x, y_,
                                       (keep_prob, FLAGS.keep_prob))
            sess.run(train_step, feed_dict=feed_dict)

    print('test accuracy %.4f' % accuracy.eval(feed_dict={
        x: data_sets.test.images,
        y_: data_sets.test.labels,
        keep_prob: 1.0
    }))
                learning_rate = tf.train.exponential_decay(
                    starter_learning_rate,
                    global_step,
                    300,
                    0.96,
                    staircase=True)

                #opt  = tf.train.GradientDescentOptimizer(learning_rate=learning_rate)
                opt = tf.train.AdagradOptimizer(learning_rate=learning_rate)
                #opt = tf.train.AdamOptimizer()
                train_step = opt.minimize(
                    loss, var_list=[coeff],
                    global_step=global_step)  # ,[mean,sigma]

                if options.log is not None:
                    coeff_hist = tf.histogram_summary("coeff", coeff)
                    loss_summary = tf.scalar_summary("loss", loss)
                    learning_rate_summary = tf.scalar_summary(
                        "learning_rate", learning_rate)

            if options.log is not None:
                summary_op = tf.merge_all_summaries()

            init = tf.initialize_all_variables()
            sess = tf.Session(config=tf_config)

            if options.log is not None:
                print("Writing log to {}".format(options.log))
                writer = tf.train.SummaryWriter(options.log, sess.graph_def)

            sess.run(init)
def train(target, dataset, cluster_spec):
    """Train Inception on a dataset for a number of steps."""
    # Number of workers and parameter servers are infered from the workers and ps
    # hosts string.
    num_workers = len(cluster_spec.as_dict()['worker'])
    num_parameter_servers = len(cluster_spec.as_dict()['ps'])
    # If no value is given, num_replicas_to_aggregate defaults to be the number of
    # workers.
    if FLAGS.num_replicas_to_aggregate == -1:
        num_replicas_to_aggregate = num_workers
    else:
        num_replicas_to_aggregate = FLAGS.num_replicas_to_aggregate

    # Both should be greater than 0 in a distributed training.
    assert num_workers > 0 and num_parameter_servers > 0, (
        ' num_workers and '
        'num_parameter_servers'
        ' must be > 0.')

    # Choose worker 0 as the chief. Note that any worker could be the chief
    # but there should be only one chief.
    is_chief = (FLAGS.task_id == 0)

    # Ops are assigned to worker by default.
    with tf.device('/job:worker/task:%d' % FLAGS.task_id):
        # Variables and its related init/assign ops are assigned to ps.
        with slim.scopes.arg_scope(
            [slim.variables.variable, slim.variables.global_step],
                device=slim.variables.VariableDeviceChooser(
                    num_parameter_servers)):
            # Create a variable to count the number of train() calls. This equals the
            # number of updates applied to the variables.
            global_step = slim.variables.global_step()

            # Calculate the learning rate schedule.
            num_batches_per_epoch = (dataset.num_examples_per_epoch() /
                                     FLAGS.batch_size)
            # Decay steps need to be divided by the number of replicas to aggregate.
            decay_steps = int(num_batches_per_epoch *
                              FLAGS.num_epochs_per_decay /
                              num_replicas_to_aggregate)

            # Decay the learning rate exponentially based on the number of steps.
            lr = tf.train.exponential_decay(FLAGS.initial_learning_rate,
                                            global_step,
                                            decay_steps,
                                            FLAGS.learning_rate_decay_factor,
                                            staircase=True)
            # Add a summary to track the learning rate.
            tf.scalar_summary('learning_rate', lr)

            # Create an optimizer that performs gradient descent.
            opt = tf.train.RMSPropOptimizer(lr,
                                            RMSPROP_DECAY,
                                            momentum=RMSPROP_MOMENTUM,
                                            epsilon=RMSPROP_EPSILON)

            images, labels = image_processing.distorted_inputs(
                dataset,
                batch_size=FLAGS.batch_size,
                num_preprocess_threads=FLAGS.num_preprocess_threads)

            # Number of classes in the Dataset label set plus 1.
            # Label 0 is reserved for an (unused) background class.
            num_classes = dataset.num_classes() + 1
            logits = inception.inference(images,
                                         num_classes,
                                         for_training=True)
            # Add classification loss.
            inception.loss(logits, labels)

            # Gather all of the losses including regularization losses.
            losses = tf.get_collection(slim.losses.LOSSES_COLLECTION)
            losses += tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES)

            total_loss = tf.add_n(losses, name='total_loss')

            if is_chief:
                # Compute the moving average of all individual losses and the
                # total loss.
                loss_averages = tf.train.ExponentialMovingAverage(0.9,
                                                                  name='avg')
                loss_averages_op = loss_averages.apply(losses + [total_loss])

                # Attach a scalar summmary to all individual losses and the total loss;
                # do the same for the averaged version of the losses.
                for l in losses + [total_loss]:
                    loss_name = l.op.name
                    # Name each loss as '(raw)' and name the moving average version of the
                    # loss as the original loss name.
                    tf.scalar_summary(loss_name + ' (raw)', l)
                    tf.scalar_summary(loss_name, loss_averages.average(l))

                # Add dependency to compute loss_averages.
                with tf.control_dependencies([loss_averages_op]):
                    total_loss = tf.identity(total_loss)

            # Track the moving averages of all trainable variables.
            # Note that we maintain a 'double-average' of the BatchNormalization
            # global statistics.
            # This is not needed when the number of replicas are small but important
            # for synchronous distributed training with tens of workers/replicas.
            exp_moving_averager = tf.train.ExponentialMovingAverage(
                inception.MOVING_AVERAGE_DECAY, global_step)

            variables_to_average = (tf.trainable_variables() +
                                    tf.moving_average_variables())

            # Add histograms for model variables.
            for var in variables_to_average:
                tf.histogram_summary(var.op.name, var)

            # Create synchronous replica optimizer.
            opt = tf.train.SyncReplicasOptimizer(
                opt,
                replicas_to_aggregate=num_replicas_to_aggregate,
                replica_id=FLAGS.task_id,
                total_num_replicas=num_workers,
                variable_averages=exp_moving_averager,
                variables_to_average=variables_to_average)

            batchnorm_updates = tf.get_collection(
                slim.ops.UPDATE_OPS_COLLECTION)
            assert batchnorm_updates, 'Batchnorm updates are missing'
            batchnorm_updates_op = tf.group(*batchnorm_updates)
            # Add dependency to compute batchnorm_updates.
            with tf.control_dependencies([batchnorm_updates_op]):
                total_loss = tf.identity(total_loss)

            # Compute gradients with respect to the loss.
            grads = opt.compute_gradients(total_loss)

            # Add histograms for gradients.
            for grad, var in grads:
                if grad is not None:
                    tf.histogram_summary(var.op.name + '/gradients', grad)

            apply_gradients_op = opt.apply_gradients(grads,
                                                     global_step=global_step)

            with tf.control_dependencies([apply_gradients_op]):
                train_op = tf.identity(total_loss, name='train_op')

            # Get chief queue_runners, init_tokens and clean_up_op, which is used to
            # synchronize replicas.
            # More details can be found in sync_replicas_optimizer.
            chief_queue_runners = [opt.get_chief_queue_runner()]
            init_tokens_op = opt.get_init_tokens_op()
            clean_up_op = opt.get_clean_up_op()

            # Create a saver.
            saver = tf.train.Saver()

            # Build the summary operation based on the TF collection of Summaries.
            summary_op = tf.merge_all_summaries()

            # Build an initialization operation to run below.
            init_op = tf.initialize_all_variables()

            # We run the summaries in the same thread as the training operations by
            # passing in None for summary_op to avoid a summary_thread being started.
            # Running summaries and training operations in parallel could run out of
            # GPU memory.
            sv = tf.train.Supervisor(is_chief=is_chief,
                                     logdir=FLAGS.train_dir,
                                     init_op=init_op,
                                     summary_op=None,
                                     global_step=global_step,
                                     saver=saver,
                                     save_model_secs=FLAGS.save_interval_secs)

            tf.logging.info('%s Supervisor' % datetime.now())

            sess_config = tf.ConfigProto(
                allow_soft_placement=True,
                log_device_placement=FLAGS.log_device_placement)

            # Get a session.
            sess = sv.prepare_or_wait_for_session(target, config=sess_config)

            # Start the queue runners.
            queue_runners = tf.get_collection(tf.GraphKeys.QUEUE_RUNNERS)
            sv.start_queue_runners(sess, queue_runners)
            tf.logging.info('Started %d queues for processing input data.',
                            len(queue_runners))

            if is_chief:
                sv.start_queue_runners(sess, chief_queue_runners)
                sess.run(init_tokens_op)

            # Train, checking for Nans. Concurrently run the summary operation at a
            # specified interval. Note that the summary_op and train_op never run
            # simultaneously in order to prevent running out of GPU memory.
            next_summary_time = time.time() + FLAGS.save_summaries_secs
            step = 0
            while (not sv.should_stop()) and step <= 2000:
                try:

                    start_time = time.time()
                    run_metadata = tf.RunMetadata()
                    profile_step = 60
                    trace_done = False

                    if step == profile_step:
                        tf.logging.info("Tracing at step %d" % step)
                        loss_value, step = sess.run(
                            [train_op, global_step],
                            options=tf.RunOptions(
                                trace_level=tf.RunOptions.FULL_TRACE),
                            run_metadata=run_metadata)
                        trace_done = True
                    else:
                        loss_value, step = sess.run([train_op, global_step])

                    duration = time.time() - start_time

                    if trace_done:
                        trace = timeline.Timeline(
                            step_stats=run_metadata.step_stats)
                        trace_file = open('/tmp/timeline.ctf.json', 'w')
                        trace_file.write(trace.generate_chrome_trace_format())
                        trace_file.close()

                    assert not np.isnan(
                        loss_value), 'Model diverged with loss = NaN'
                    if step > FLAGS.max_steps:
                        break

                    examples_per_sec = FLAGS.batch_size / float(duration)
                    format_str = ('Worker %d: %s: step %d, loss = %.2f'
                                  '(%.1f examples/sec; %.3f  sec/batch)')
                    if step >= 10 and step != profile_step + 1:
                        tf.logging.info(
                            format_str %
                            (FLAGS.task_id, datetime.now(), step, loss_value,
                             examples_per_sec, duration))
                    else:
                        tf.logging.info(
                            'Not considering step %d (%.1f samples/sec)' %
                            (step, examples_per_sec))

                    # Determine if the summary_op should be run on the chief worker.


#           if is_chief and next_summary_time < time.time():
#             tf.logging.info('Running Summary operation on the chief.')
#             summary_str = sess.run(summary_op)
#             sv.summary_computed(sess, summary_str)
#             tf.logging.info('Finished running Summary operation.')
#
#             # Determine the next time for running the summary.
#             next_summary_time += FLAGS.save_summaries_secs
                except:
                    if is_chief:
                        tf.logging.info('About to execute sync_clean_up_op!')
                        sess.run(clean_up_op)
                    raise

            # Stop the supervisor.  This also waits for service threads to finish.
            sv.stop()

            # Save after the training ends.
            if is_chief:
                saver.save(sess,
                           os.path.join(FLAGS.train_dir, 'model.ckpt'),
                           global_step=global_step)
def _activation_summary(tensor):
    tensor_name = re.sub("%s_[0-9]*/" % TOWER_NAME, "", tensor.op.name)
    tf.histogram_summary(tensor_name + "/activations", tensor)
    tf.scalar_summary(tensor_name + "/sparsity", tf.nn.zero_fraction(tensor))
Example #57
0
def run_train(w2vsource, w2vdim, w2vnumfilters, lexdim, lexnumfilters,
              randomseed, datasource, model_name, trainable, the_epoch):

    np.random.seed(randomseed)
    max_len = 60
    norm_model = []

    with Timer("lex"):
        print 'new way of loading lexicon'
        default_vector_dic = {
            'EverythingUnigramsPMIHS': [0],
            'HS-AFFLEX-NEGLEX-unigrams': [0, 0, 0],
            'Maxdiff-Twitter-Lexicon_0to1': [0.5],
            'S140-AFFLEX-NEGLEX-unigrams': [0, 0, 0],
            'unigrams-pmilexicon': [0, 0, 0],
            'unigrams-pmilexicon_sentiment_140': [0, 0, 0],
            'BL': [0]
        }

        lexfile_list = [
            'EverythingUnigramsPMIHS.pickle',
            'HS-AFFLEX-NEGLEX-unigrams.pickle',
            'Maxdiff-Twitter-Lexicon_0to1.pickle',
            'S140-AFFLEX-NEGLEX-unigrams.pickle', 'unigrams-pmilexicon.pickle',
            'unigrams-pmilexicon_sentiment_140.pickle', 'BL.pickle'
        ]

        for idx, lexfile in enumerate(lexfile_list):
            fname = '../data/le/%s' % lexfile
            print 'default lexicon for %s' % lexfile

            with open(fname, 'rb') as handle:
                each_model = pickle.load(handle)
                default_vector = default_vector_dic[lexfile.replace(
                    '.pickle', '')]
                each_model["<PAD/>"] = default_vector
                norm_model.append(each_model)

    unigram_lexicon_model = norm_model

    # CONFIGURE
    # ==================================================
    if datasource == 'semeval':
        numberofclass = 3
        use_rotten_tomato = False
    elif datasource == 'sst':
        numberofclass = 5
        use_rotten_tomato = True

    # Training
    # ==================================================
    if randomseed > 0:
        tf.set_random_seed(randomseed)
    with tf.Graph().as_default():
        tf.set_random_seed(randomseed)
        max_af1_dev = 0
        index_at_max_af1_dev = 0
        af1_tst_at_max_af1_dev = 0

        #WORD2VEC
        x_text, y = cnn_data_helpers.load_data_trainable(
            "everydata", rottenTomato=use_rotten_tomato)
        max_document_length = max([len(x.split(" ")) for x in x_text])
        vocab_processor = learn.preprocessing.VocabularyProcessor(
            max_document_length)
        vocab_processor.fit_transform(x_text)
        total_vocab_size = len(vocab_processor.vocabulary_)

        x_train, y_train = cnn_data_helpers.load_data_trainable(
            "trn", rottenTomato=use_rotten_tomato)
        x_dev, y_dev = cnn_data_helpers.load_data_trainable(
            "dev", rottenTomato=use_rotten_tomato)
        x_test, y_test = cnn_data_helpers.load_data_trainable(
            "tst", rottenTomato=use_rotten_tomato)
        x_train = np.array(list(vocab_processor.fit_transform(x_train)))
        x_dev = np.array(list(vocab_processor.fit_transform(x_dev)))
        x_test = np.array(list(vocab_processor.fit_transform(x_test)))

        del (norm_model)
        gc.collect()

        print("Train/Dev split: {:d}/{:d}".format(len(y_train), len(y_dev)))

        session_conf = tf.ConfigProto(
            allow_soft_placement=FLAGS.allow_soft_placement,
            log_device_placement=FLAGS.log_device_placement)
        sess = tf.Session(config=session_conf)
        with sess.as_default():
            if randomseed > 0:
                tf.set_random_seed(randomseed)

            cnn = W2V_TRAINABLE(sequence_length=x_train.shape[1],
                                num_classes=numberofclass,
                                vocab_size=len(vocab_processor.vocabulary_),
                                is_trainable=trainable,
                                embedding_size=w2vdim,
                                filter_sizes=list(
                                    map(int, FLAGS.filter_sizes.split(","))),
                                num_filters=w2vnumfilters,
                                embedding_size_lex=lexdim,
                                num_filters_lex=lexnumfilters,
                                themodel=model_name,
                                l2_reg_lambda=FLAGS.l2_reg_lambda)

            # Define Training procedure
            global_step = tf.Variable(0, name="global_step", trainable=False)
            optimizer = tf.train.AdamOptimizer(1e-3)
            grads_and_vars = optimizer.compute_gradients(cnn.loss)
            train_op = optimizer.apply_gradients(grads_and_vars,
                                                 global_step=global_step)

            # Keep track of gradient values and sparsity (optional)
            grad_summaries = []
            for g, v in grads_and_vars:
                if g is not None:
                    grad_hist_summary = tf.histogram_summary(
                        "{}/grad/hist".format(v.name), g)
                    sparsity_summary = tf.scalar_summary(
                        "{}/grad/sparsity".format(v.name),
                        tf.nn.zero_fraction(g))
                    grad_summaries.append(grad_hist_summary)
                    grad_summaries.append(sparsity_summary)
            grad_summaries_merged = tf.merge_summary(grad_summaries)

            # Output directory for models and summaries
            timestamp = str(int(time.time()))
            out_dir = os.path.abspath(
                os.path.join(os.path.curdir, "runs", timestamp))
            print("Writing to {}\n".format(out_dir))

            # Summaries for loss and accuracy
            loss_summary = tf.scalar_summary("loss", cnn.loss)
            acc_summary = tf.scalar_summary("accuracy", cnn.accuracy)
            f1_summary = tf.scalar_summary("avg_f1", cnn.avg_f1)

            # Train Summaries
            train_summary_op = tf.merge_summary(
                [loss_summary, acc_summary, f1_summary, grad_summaries_merged])
            train_summary_dir = os.path.join(out_dir, "summaries", "train")
            train_summary_writer = tf.train.SummaryWriter(
                train_summary_dir, sess.graph_def)

            # Dev summaries
            dev_summary_op = tf.merge_summary(
                [loss_summary, acc_summary, f1_summary])
            dev_summary_dir = os.path.join(out_dir, "summaries", "dev")
            dev_summary_writer = tf.train.SummaryWriter(
                dev_summary_dir, sess.graph_def)

            # Test summaries
            test_summary_op = tf.merge_summary(
                [loss_summary, acc_summary, f1_summary])
            test_summary_dir = os.path.join(out_dir, "summaries", "test")
            test_summary_writer = tf.train.SummaryWriter(
                test_summary_dir, sess.graph_def)

            # Checkpoint directory. Tensorflow assumes this directory already exists so we need to create it
            checkpoint_dir = os.path.abspath(
                os.path.join(out_dir, "checkpoints"))
            checkpoint_prefix = os.path.join(checkpoint_dir, "model")
            if not os.path.exists(checkpoint_dir):
                os.makedirs(checkpoint_dir)
            saver = tf.train.Saver(tf.all_variables())

            # Initialize all variables
            sess.run(tf.initialize_all_variables())
            the_base_path = '../data/emory_w2v/'
            if w2vsource == "twitter":
                the_model_path = the_base_path + 'w2v-%d.bin' % w2vdim
            elif w2vsource == "amazon":
                the_model_path = the_base_path + 'w2v-%d-%s.bin' % (w2vdim,
                                                                    w2vsource)

            # initial matrix with random uniform
            initW = np.random.uniform(0.0, 0.0, (total_vocab_size, w2vdim))
            initW_lex = np.random.uniform(0.00, 0.2,
                                          (total_vocab_size, lexdim))
            # load any vectors from the word2vec
            with Timer("LOADING W2V..."):
                print("LOADING word2vec file {} \n".format(the_model_path))
                #W2V
                with open(the_model_path, "rb") as f:
                    header = f.readline()
                    vocab_size, layer1_size = map(int, header.split())
                    binary_len = np.dtype('float32').itemsize * layer1_size
                    for line in xrange(vocab_size):
                        word = []
                        while True:
                            ch = f.read(1)
                            if ch == ' ':
                                word = ''.join(word)
                                break
                            if ch != '\n':
                                word.append(ch)
                        idx = vocab_processor.vocabulary_.get(word)
                        if idx != 0:
                            #print str(idx) + " -> " + word
                            initW[idx] = np.fromstring(f.read(binary_len),
                                                       dtype='float32')
                        else:
                            f.read(binary_len)
            with Timer("LOADING LEXICON..."):
                vocabulary_set = set()
                for index, eachModel in enumerate(unigram_lexicon_model):
                    for word in eachModel:
                        vocabulary_set.add(word)

                for word in vocabulary_set:
                    lexiconList = np.empty([0, 1])
                    for index, eachModel in enumerate(unigram_lexicon_model):
                        if word in eachModel:
                            temp = np.array(np.float32(eachModel[word]))
                        else:
                            temp = np.array(np.float32(eachModel["<PAD/>"]))
                        lexiconList = np.append(lexiconList, temp)

                    idx = vocab_processor.vocabulary_.get(word)
                    if idx != 0:
                        initW_lex[idx] = lexiconList

            sess.run(cnn.W.assign(initW))
            if model_name == 'w2v_lex':
                sess.run(cnn.W_lex.assign(initW_lex))

            def train_step(x_batch, y_batch):
                """
                A single training step
                """
                feed_dict = {
                    cnn.input_x: x_batch,
                    cnn.input_y: y_batch,
                    cnn.dropout_keep_prob: FLAGS.dropout_keep_prob
                }
                _, step, summaries, loss, accuracy, neg_r, neg_p, f1_neg, f1_pos, avg_f1 = sess.run(
                    [
                        train_op, global_step, train_summary_op, cnn.loss,
                        cnn.accuracy, cnn.neg_r, cnn.neg_p, cnn.f1_neg,
                        cnn.f1_pos, cnn.avg_f1
                    ], feed_dict)
                time_str = datetime.datetime.now().isoformat()
                train_summary_writer.add_summary(summaries, step)

            def dev_step(x_batch, y_batch, writer=None, score_type='f1'):
                """
                Evaluates model on a dev set
                """
                feed_dict = {
                    cnn.input_x: x_batch,
                    cnn.input_y: y_batch,
                    cnn.dropout_keep_prob: 1.0
                }
                step, summaries, loss, accuracy, neg_r, neg_p, f1_neg, f1_pos, avg_f1 = sess.run(
                    [
                        global_step, dev_summary_op, cnn.loss, cnn.accuracy,
                        cnn.neg_r, cnn.neg_p, cnn.f1_neg, cnn.f1_pos,
                        cnn.avg_f1
                    ], feed_dict)
                time_str = datetime.datetime.now().isoformat()
                print(
                    "{} : {} step {}, loss {:g}, acc {:g}, neg_r {:g} neg_p {:g} f1_neg {:g}, f1_pos {:g}, f1 {:g}"
                    .format("DEV", time_str, step, loss, accuracy, neg_r,
                            neg_p, f1_neg, f1_pos, avg_f1))
                if writer:
                    writer.add_summary(summaries, step)

                if score_type == 'f1':
                    return avg_f1
                else:
                    return accuracy

            def test_step(x_batch, y_batch, writer=None, score_type='f1'):
                """
                Evaluates model on a test set
                """

                feed_dict = {
                    cnn.input_x: x_batch,
                    cnn.input_y: y_batch,
                    cnn.dropout_keep_prob: 1.0
                }
                step, summaries, loss, accuracy, neg_r, neg_p, f1_neg, f1_pos, avg_f1 = sess.run(
                    [
                        global_step, dev_summary_op, cnn.loss, cnn.accuracy,
                        cnn.neg_r, cnn.neg_p, cnn.f1_neg, cnn.f1_pos,
                        cnn.avg_f1
                    ], feed_dict)
                time_str = datetime.datetime.now().isoformat()
                print(
                    "{} : {} step {}, loss {:g}, acc {:g}, neg_r {:g} neg_p {:g} f1_neg {:g}, f1_pos {:g}, f1 {:g}"
                    .format("TEST", time_str, step, loss, accuracy, neg_r,
                            neg_p, f1_neg, f1_pos, avg_f1))
                if writer:
                    writer.add_summary(summaries, step)

                if score_type == 'f1':
                    return avg_f1
                else:
                    return accuracy

            # Generate batches
            batches = cnn_data_helpers.batch_iter(list(zip(x_train, y_train)),
                                                  FLAGS.batch_size, the_epoch)

            # Training loop. For each batch...
            for batch in batches:
                x_batch, y_batch = zip(*batch)
                train_step(x_batch, y_batch)

                current_step = tf.train.global_step(sess, global_step)
                if current_step % FLAGS.evaluate_every == 0:

                    print("Evaluation:")

                    if datasource == 'semeval':
                        curr_af1_dev = dev_step(x_dev,
                                                y_dev,
                                                writer=dev_summary_writer)
                        curr_af1_tst = test_step(x_test,
                                                 y_test,
                                                 writer=test_summary_writer)

                    elif datasource == 'sst':
                        curr_af1_dev = dev_step(x_dev,
                                                y_dev,
                                                writer=dev_summary_writer,
                                                score_type='acc')
                        curr_af1_tst = test_step(x_test,
                                                 y_test,
                                                 writer=test_summary_writer,
                                                 score_type='acc')

                    if curr_af1_dev > max_af1_dev:
                        max_af1_dev = curr_af1_dev
                        index_at_max_af1_dev = current_step
                        af1_tst_at_max_af1_dev = curr_af1_tst

                    print 'Status: [%d] Max f1 for dev (%f), Max f1 for tst (%f)\n' % (
                        index_at_max_af1_dev, max_af1_dev,
                        af1_tst_at_max_af1_dev)
                    sys.stdout.flush()
def train():
  """Train CIFAR-10 for a number of steps."""
  with tf.Graph().as_default(), tf.device('/cpu:0'):
    # Create a variable to count the number of train() calls. This equals the
    # number of batches processed * FLAGS.num_gpus.
    global_step = tf.get_variable(
        'global_step', [],
        initializer=tf.constant_initializer(0), trainable=False)

    # Calculate the learning rate schedule.
    num_batches_per_epoch = (cifar10.NUM_EXAMPLES_PER_EPOCH_FOR_TRAIN /
                             FLAGS.batch_size)
    decay_steps = int(num_batches_per_epoch * cifar10.NUM_EPOCHS_PER_DECAY)

    # Decay the learning rate exponentially based on the number of steps.
    lr = tf.train.exponential_decay(cifar10.INITIAL_LEARNING_RATE,
                                    global_step,
                                    decay_steps,
                                    cifar10.LEARNING_RATE_DECAY_FACTOR,
                                    staircase=True)

    # Create an optimizer that performs gradient descent.
    opt = tf.train.GradientDescentOptimizer(lr)

    # Calculate the gradients for each model tower.
    tower_grads = []
    for i in xrange(FLAGS.num_gpus):
      with tf.device('/gpu:%d' % i):
        with tf.name_scope('%s_%d' % (cifar10.TOWER_NAME, i)) as scope:
          # Calculate the loss for one tower of the CIFAR model. This function
          # constructs the entire CIFAR model but shares the variables across
          # all towers.
          loss = tower_loss(scope)

          # Reuse variables for the next tower.
          tf.get_variable_scope().reuse_variables()

          # Retain the summaries from the final tower.
          summaries = tf.get_collection(tf.GraphKeys.SUMMARIES, scope)

          # Calculate the gradients for the batch of data on this CIFAR tower.
          grads = opt.compute_gradients(loss)

          # Keep track of the gradients across all towers.
          tower_grads.append(grads)

    # We must calculate the mean of each gradient. Note that this is the
    # synchronization point across all towers.
    grads = average_gradients(tower_grads)

    # Add a summary to track the learning rate.
    summaries.append(tf.scalar_summary('learning_rate', lr))

    # Add histograms for gradients.
    for grad, var in grads:
      if grad:
        summaries.append(
            tf.histogram_summary(var.op.name + '/gradients', grad))

    # Apply the gradients to adjust the shared variables.
    apply_gradient_op = opt.apply_gradients(grads, global_step=global_step)

    # Add histograms for trainable variables.
    for var in tf.trainable_variables():
      summaries.append(tf.histogram_summary(var.op.name, var))

    # Track the moving averages of all trainable variables.
    variable_averages = tf.train.ExponentialMovingAverage(
        cifar10.MOVING_AVERAGE_DECAY, global_step)
    variables_averages_op = variable_averages.apply(tf.trainable_variables())

    # Group all updates to into a single train op.
    train_op = tf.group(apply_gradient_op, variables_averages_op)

    # Create a saver.
    saver = tf.train.Saver(tf.all_variables())

    # Build the summary operation from the last tower summaries.
    summary_op = tf.merge_summary(summaries)

    # Build an initialization operation to run below.
    init = tf.initialize_all_variables()

    # Start running operations on the Graph. allow_soft_placement must be set to
    # True to build towers on GPU, as some of the ops do not have GPU
    # implementations.
    sess = tf.Session(config=tf.ConfigProto(
        allow_soft_placement=True,
        log_device_placement=FLAGS.log_device_placement))
    sess.run(init)

    # Start the queue runners.
    tf.train.start_queue_runners(sess=sess)

    summary_writer = tf.train.SummaryWriter(FLAGS.train_dir,
                                            graph_def=sess.graph_def)

    for step in xrange(FLAGS.max_steps):
      start_time = time.time()
      _, loss_value = sess.run([train_op, loss])
      duration = time.time() - start_time

      assert not np.isnan(loss_value), 'Model diverged with loss = NaN'

      if step % 10 == 0:
        num_examples_per_step = FLAGS.batch_size * FLAGS.num_gpus
        examples_per_sec = num_examples_per_step / duration
        sec_per_batch = duration / FLAGS.num_gpus

        format_str = ('%s: step %d, loss = %.2f (%.1f examples/sec; %.3f '
                      'sec/batch)')
        print (format_str % (datetime.now(), step, loss_value,
                             examples_per_sec, sec_per_batch))

      if step % 100 == 0:
        summary_str = sess.run(summary_op)
        summary_writer.add_summary(summary_str, step)

      # Save the model checkpoint periodically.
      if step % 1000 == 0 or (step + 1) == FLAGS.max_steps:
        checkpoint_path = os.path.join(FLAGS.train_dir, 'model.ckpt')
        saver.save(sess, checkpoint_path, global_step=step)
Example #59
0
def training(DO_DEV_SPLIT, FLAGS, scheme_name, vocabulary, embed_matrix,
             x_train, x_dev, y_train, y_dev, num_filters, dropout_prob,
             l2_lambda, test_x, test_y):
    with tf.Graph().as_default():
        session_conf = tf.ConfigProto(
            allow_soft_placement=FLAGS.allow_soft_placement,
            log_device_placement=FLAGS.log_device_placement)
        sess = tf.Session(config=session_conf)
        with sess.as_default():
            cnn = TextCNN(
                sequence_length=x_train.shape[1],
                num_classes=FLAGS.
                num_classes,  # Number of classification classes
                vocab_size=len(vocabulary),
                embedding_size=FLAGS.embedding_dim,
                filter_sizes=list(map(int, FLAGS.filter_sizes.split(","))),
                num_filters=num_filters,
                l2_reg_lambda=l2_lambda,
                init_embedding=embed_matrix)

            # Define Training procedure
            global_step = tf.Variable(0, name="global_step", trainable=False)
            optimizer = tf.train.AdamOptimizer(1e-3)
            grads_and_vars = optimizer.compute_gradients(cnn.loss)
            train_op = optimizer.apply_gradients(grads_and_vars,
                                                 global_step=global_step)

            # Keep track of gradient values and sparsity (optional)
            with tf.name_scope('grad_summary'):
                grad_summaries = []
                for g, v in grads_and_vars:
                    if g is not None:
                        grad_hist_summary = tf.histogram_summary(
                            "{}/grad/hist".format(v.name), g)
                        sparsity_summary = tf.scalar_summary(
                            "{}/grad/sparsity".format(v.name),
                            tf.nn.zero_fraction(g))
                        grad_summaries.append(grad_hist_summary)
                        grad_summaries.append(sparsity_summary)
                grad_summaries_merged = tf.merge_summary(grad_summaries)

            # Output directory for models and summaries
            timestamp = str(int(time.time()))
            out_dir = os.path.abspath(
                os.path.join(os.path.curdir, "runs", scheme_name, timestamp))
            print(("Writing to {}\n".format(out_dir)))

            # Summaries for loss and accuracy
            loss_summary = tf.scalar_summary("loss", cnn.loss)
            pred_ratio_summary = []
            for i in range(FLAGS.num_classes):
                pred_ratio_summary.append(
                    tf.scalar_summary(
                        "prediction/label_" + str(i) + "_percentage",
                        cnn.rate_percentage[i]))
            acc_summary = tf.scalar_summary("accuracy", cnn.accuracy)

            # Train Summaries
            with tf.name_scope('train_summary'):
                train_summary_op = tf.merge_summary([
                    loss_summary, acc_summary, pred_ratio_summary,
                    grad_summaries_merged
                ])
                train_summary_dir = os.path.join(out_dir, "summaries", "train")
                train_summary_writer = tf.train.SummaryWriter(
                    train_summary_dir, sess.graph_def)

            # Dev summaries
            with tf.name_scope('dev_summary'):
                dev_summary_op = tf.merge_summary(
                    [loss_summary, acc_summary, pred_ratio_summary])
                dev_summary_dir = os.path.join(out_dir, "summaries", "dev")
                dev_summary_writer = tf.train.SummaryWriter(
                    dev_summary_dir, sess.graph_def)

            # Checkpoint directory. Tensorflow assumes this directory already exists so we need to create it
            checkpoint_dir = os.path.abspath(
                os.path.join(out_dir, "checkpoints"))
            checkpoint_prefix = os.path.join(checkpoint_dir, "model")
            if not os.path.exists(checkpoint_dir):
                os.makedirs(checkpoint_dir)
            saver = tf.train.Saver(var_list=tf.global_variables(),
                                   max_to_keep=7)

            # Initialize all variables
            sess.run(tf.global_variables_initializer())

        def train_step(x_batch, y_batch):
            """
            A single training step
            """
            feed_dict = {
                cnn.input_x: x_batch,
                cnn.input_y: y_batch,
                cnn.dropout_keep_prob: dropout_prob
            }
            _, step, summaries, loss, accuracy = sess.run([
                train_op, global_step, train_summary_op, cnn.loss, cnn.accuracy
            ], feed_dict)
            time_str = datetime.datetime.now().isoformat()
            print(("{}: step {}, loss {:g}, acc {:g}".format(
                time_str, step, loss, accuracy)))
            train_summary_writer.add_summary(summaries, step)

        def dev_step(x_batch, y_batch, writer=None):
            """
            Evaluates model on a dev set
            """
            feed_dict = {
                cnn.input_x: x_batch,
                cnn.input_y: y_batch,
                cnn.dropout_keep_prob: 1
            }
            step, summaries, loss, accuracy = sess.run(
                [global_step, dev_summary_op, cnn.loss, cnn.accuracy],
                feed_dict)
            time_str = datetime.datetime.now().isoformat()
            print(("{}: step {}, loss {:g}, acc {:g}".format(
                time_str, step, loss, accuracy)))
            if writer:
                writer.add_summary(summaries, step)

        # Generate batches
        batches = dh.DataHelperPan12.batch_iter(list(zip(x_train, y_train)),
                                                FLAGS.batch_size,
                                                FLAGS.num_epochs)

        if test_x is not None and test_y is not None:
            test_x_1 = test_x[:100]
            test_y_1 = test_y[:100]
            test_x_2 = test_x[100:200]
            test_y_2 = test_y[100:200]

        # Training loop. For each batch...
        for batch in batches:
            x_batch, y_batch = list(zip(*batch))
            train_step(x_batch, y_batch)
            current_step = tf.train.global_step(sess, global_step)
            if DO_DEV_SPLIT and current_step % FLAGS.evaluate_every == 0:
                print("\nEvaluation:")
                dev_batches = dh.DataHelperPan12.batch_iter(
                    list(zip(x_dev, y_dev)), 100, 1)
                for dev_batch in dev_batches:
                    if len(dev_batch) > 0:
                        small_dev_x, small_dev_y = list(zip(*dev_batch))
                        dev_step(small_dev_x,
                                 small_dev_y,
                                 writer=dev_summary_writer)
                        print("")
            elif test_x is not None and test_y is not None and current_step % 200 == 0:
                dev_step(test_x_1, test_y_1, writer=dev_summary_writer)
                dev_step(test_x_2, test_y_2, writer=dev_summary_writer)
            if current_step % FLAGS.checkpoint_every == 0:
                path = saver.save(sess,
                                  checkpoint_prefix,
                                  global_step=current_step)
                print(("Saved model checkpoint to {}\n".format(path)))
            if current_step == 3500:
                break
    return timestamp
Example #60
0
def monitor_train_vars(collections=None):
    for var in tf.trainable_variables():
        tf.histogram_summary(var.op.name, var, collections=collections)