Example #1
0
def sample_from_discretized_mix_logistic(l, nr_mix):
    ls = int_shape(l)
    xs = ls[:-1] + [3]
    # unpack parameters
    logit_probs = l[:, :, :, :nr_mix]
    l = tf.reshape(l[:, :, :, nr_mix:], xs + [nr_mix * 3])
    # sample mixture indicator from softmax
    sel = tf.one_hot(tf.argmax(logit_probs - tf.log(-tf.log(tf.random_uniform(
        logit_probs.get_shape(), minval=1e-5, maxval=1. - 1e-5))), 3), depth=nr_mix, dtype=tf.float32)
    sel = tf.reshape(sel, xs[:-1] + [1, nr_mix])
    # select logistic parameters
    means = tf.reduce_sum(l[:, :, :, :, :nr_mix] * sel, 4)
    log_scales = tf.maximum(tf.reduce_sum(
        l[:, :, :, :, nr_mix:2 * nr_mix] * sel, 4), -7.)
    coeffs = tf.reduce_sum(tf.nn.tanh(
        l[:, :, :, :, 2 * nr_mix:3 * nr_mix]) * sel, 4)
    # sample from logistic & clip to interval
    # we don't actually round to the nearest 8bit value when sampling
    u = tf.random_uniform(means.get_shape(), minval=1e-5, maxval=1. - 1e-5)
    x = means + tf.exp(log_scales) * (tf.log(u) - tf.log(1. - u))
    x0 = tf.minimum(tf.maximum(x[:, :, :, 0], -1.), 1.)
    x1 = tf.minimum(tf.maximum(
        x[:, :, :, 1] + coeffs[:, :, :, 0] * x0, -1.), 1.)
    x2 = tf.minimum(tf.maximum(
        x[:, :, :, 2] + coeffs[:, :, :, 1] * x0 + coeffs[:, :, :, 2] * x1, -1.), 1.)
    return tf.concat([tf.reshape(x0, xs[:-1] + [1]), tf.reshape(x1, xs[:-1] + [1]), tf.reshape(x2, xs[:-1] + [1])], 3)
Example #2
0
 def __init__(self,units,activation=None,mean=None,std=None,eur=False,no_bias=False):
     
   
     assert not(units is None),"You need to provide the number of units ([n_in,n_out])"
     if(mean is None):
         mean=0.0
     if(std is None):
         std = 1.0/(float(units[0])**0.5)
     
     
     self.n_in,self.n_out = units
     self.no_bias = no_bias
   
           
     if(activation is None):
         self.activation = 'sigmoid'
     else:
         self.activation = activation
     
     if(eur):
         if(self.activation =='sigmoid'):
             self.W = tf.Variable(tf.random_uniform(units,minval=(-4*(6.0/(self.n_in+self.n_out))**0.5),maxval=(4*(6.0/(self.n_in+self.n_out))**0.5)),name="W")
         elif(self.activation == "leaky_relu6" or self.activation == 'relu' or self.activation == 'relu6' or self.activation == "leaky_relu"):
             self.W = tf.Variable(tf.random_uniform(units,minval=0,maxval=(6.0/(self.n_in+self.n_out))**0.5),name="W")
         elif(self.activation == 'tanh'): 
             self.W = tf.Variable(tf.random_uniform(units,minval=(-(6.0/(self.n_in+self.n_out))**0.5),maxval=((6.0/(self.n_in+self.n_out))**0.5)),name="W")
         else:
             self.W = tf.Variable(tf.truncated_normal(units,mean=mean,stddev=std),name="W")
     else:   
         self.W = tf.Variable(tf.truncated_normal(units,mean=mean,stddev=std),name="W")
     
     if(no_bias):
         self.b = None
     else:
     	self.b = tf.Variable(tf.zeros([units[1]]),name="b")
  def testCustomGrad(self):

    def fn(a, b, c):
      return tf.layers.dense(a, 10, use_bias=False) + tf.matmul(b, c)

    def grad_fn(inputs, variables, unused_outputs, unused_grad_outputs):
      grad_inputs = [tf.ones_like(t) * (i + 1.) for i, t in enumerate(inputs)]
      grad_vars = [
          tf.ones_like(t) * (i + len(inputs) + 1.)
          for i, t in enumerate(variables)
      ]
      return grad_inputs, grad_vars

    a = tf.random_uniform([11, 6])
    b = tf.random_uniform([11, 7])
    c = tf.random_uniform([7, 10])
    w = tf.random_uniform([6, 10])
    out = common_layers.fn_with_custom_grad(grad_fn)(fn)(a, b, c)
    loss = tf.reduce_mean(out)
    grads = tf.gradients(loss, [a, b, c, tf.trainable_variables()[0]])
    expected_grads = [
        tf.ones_like(t) * (i + 1.) for i, t in enumerate([a, b, c, w])
    ]
    with self.test_session() as sess:
      sess.run(tf.global_variables_initializer())
      g_val, eg_val = sess.run([grads, expected_grads])
      for g1, g2 in zip(g_val, eg_val):
        self.assertAllClose(g1, g2)
    def __init__(self, args):
        with tf.device(args.device):
            def circle(x):
                spherenet = tf.square(x)
                spherenet = tf.reduce_sum(spherenet, 1)
                lam = tf.sqrt(spherenet)
                return x/tf.reshape(lam,[int(lam.get_shape()[0]), 1])

            def modes(x):
                shape = x.get_shape()
                return tf.round(x*2)/2.0#+tf.random_normal(shape, 0, 0.04)

            if args.distribution == 'circle':
                x = tf.random_normal([args.batch_size, 2])
                x = circle(x)
            elif args.distribution == 'modes':
                x = tf.random_uniform([args.batch_size, 2], -1, 1)
                x = modes(x)
            elif args.distribution == 'modal-gaussian':
                x = tf.random_uniform([args.batch_size, 2], -1, 1)
                y = tf.random_normal([args.batch_size, 2], stddev=0.04, mean=0.15)
                x = tf.round(x) + y
            elif args.distribution == 'sin':
                x = tf.random_uniform((1, args.batch_size), -10.5, 10.5 )
                x = tf.transpose(x)
                r_data = tf.random_normal((args.batch_size,1), mean=0, stddev=0.1)
                xy = tf.sin(0.75*x)*7.0+x*0.5+r_data*1.0
                x = tf.concat([xy,x], 1)/16.0

            elif args.distribution == 'static-point':
                x = tf.ones([args.batch_size, 2])

            self.x = x
            self.xy = tf.zeros_like(self.x)
    def __init__(self, dim_image, n_words, dim_hidden, batch_size, n_lstm_steps, drop_out_rate, bias_init_vector=None):
        self.dim_image = dim_image
        self.n_words = n_words
        self.dim_hidden = dim_hidden
        self.batch_size = batch_size
        self.n_lstm_steps = n_lstm_steps
        self.drop_out_rate = drop_out_rate


        with tf.device("/gpu:2"):
            self.Wemb = tf.Variable(tf.random_uniform([n_words, dim_hidden], -0.1, 0.1), name='Wemb')

#         self.lstm1 = rnn_cell.BasicLSTMCell(dim_hidden)
#         self.lstm2 = rnn_cell.BasicLSTMCell(dim_hidden)
        
        self.lstm1 = rnn_cell.LSTMCell(self.dim_hidden,self.dim_hidden,use_peepholes = True)
        self.lstm1_dropout = rnn_cell.DropoutWrapper(self.lstm1,output_keep_prob=1 - self.drop_out_rate)
        self.lstm2 = rnn_cell.LSTMCell(self.dim_hidden,self.dim_hidden,use_peepholes = True)
        self.lstm2_dropout = rnn_cell.DropoutWrapper(self.lstm2,output_keep_prob=1 - self.drop_out_rate)
        
        
        # W is Weight, b is Bias 
        self.encode_image_W = tf.Variable( tf.random_uniform([dim_image, dim_hidden], -0.1, 0.1), name='encode_image_W')
        self.encode_image_b = tf.Variable( tf.zeros([dim_hidden]), name='encode_image_b')

        self.embed_word_W = tf.Variable(tf.random_uniform([dim_hidden, n_words], -0.1,0.1), name='embed_word_W')
        if bias_init_vector is not None:
            self.embed_word_b = tf.Variable(bias_init_vector.astype(np.float32), name='embed_word_b')
        else:
            self.embed_word_b = tf.Variable(tf.zeros([n_words]), name='embed_word_b')
Example #6
0
def _generate_synthetic_snli_data_batch(sequence_length,
                                        batch_size,
                                        vocab_size):
  """Generate a fake batch of SNLI data for testing."""
  with tf.device("cpu:0"):
    labels = tf.random_uniform([batch_size], minval=1, maxval=4, dtype=tf.int64)
    prem = tf.random_uniform(
        (sequence_length, batch_size), maxval=vocab_size, dtype=tf.int64)
    prem_trans = tf.constant(np.array(
        [[3, 3, 2, 3, 3, 3, 2, 2, 2, 3, 3, 3,
          2, 3, 3, 2, 2, 3, 3, 3, 2, 2, 2, 2,
          3, 2, 2]] * batch_size, dtype=np.int64).T)
    hypo = tf.random_uniform(
        (sequence_length, batch_size), maxval=vocab_size, dtype=tf.int64)
    hypo_trans = tf.constant(np.array(
        [[3, 3, 2, 3, 3, 3, 2, 2, 2, 3, 3, 3,
          2, 3, 3, 2, 2, 3, 3, 3, 2, 2, 2, 2,
          3, 2, 2]] * batch_size, dtype=np.int64).T)
  if tfe.num_gpus():
    labels = labels.gpu()
    prem = prem.gpu()
    prem_trans = prem_trans.gpu()
    hypo = hypo.gpu()
    hypo_trans = hypo_trans.gpu()
  return labels, prem, prem_trans, hypo, hypo_trans
  def testDiscretizedMixLogisticLoss(self):
    batch = 2
    height = 4
    width = 4
    channels = 3
    num_mixtures = 5
    logits = tf.concat(  # assign all probability mass to first component
        [tf.ones([batch, height, width, 1]) * 1e8,
         tf.zeros([batch, height, width, num_mixtures - 1])],
        axis=-1)
    locs = tf.random_uniform([batch, height, width, num_mixtures * 3],
                             minval=-.9, maxval=.9)
    log_scales = tf.random_uniform([batch, height, width, num_mixtures * 3],
                                   minval=-1., maxval=1.)
    coeffs = tf.atanh(tf.zeros([batch, height, width, num_mixtures * 3]))
    pred = tf.concat([logits, locs, log_scales, coeffs], axis=-1)

    # Test labels that don't satisfy edge cases where 8-bit value is 0 or 255.
    labels = tf.random_uniform([batch, height, width, channels],
                               minval=-.9, maxval=.9)
    locs_0 = locs[..., :3]
    log_scales_0 = log_scales[..., :3]
    centered_labels = labels - locs_0
    inv_stdv = tf.exp(-log_scales_0)
    plus_in = inv_stdv * (centered_labels + 1. / 255.)
    min_in = inv_stdv * (centered_labels - 1. / 255.)
    cdf_plus = tf.nn.sigmoid(plus_in)
    cdf_min = tf.nn.sigmoid(min_in)
    expected_loss = -tf.reduce_sum(tf.log(cdf_plus - cdf_min), axis=-1)

    actual_loss = common_layers.discretized_mix_logistic_loss(
        pred=pred, labels=labels)
    actual_loss_val, expected_loss_val = self.evaluate(
        [actual_loss, expected_loss])
    self.assertAllClose(actual_loss_val, expected_loss_val, rtol=1e-5)
Example #8
0
def _random_crop_and_flip(image, crop_height, crop_width):
  """Crops the given image to a random part of the image, and randomly flips.

  Args:
    image: a 3-D image tensor
    crop_height: the new height.
    crop_width: the new width.

  Returns:
    3-D tensor with cropped image.

  """
  height, width = _get_h_w(image)

  # Create a random bounding box.
  # Use tf.random_uniform and not numpy.random.rand as doing the former would
  # generate random numbers at graph eval time, unlike the latter which
  # generates random numbers at graph definition time.
  total_crop_height = (height - crop_height)
  crop_top = tf.random_uniform([], maxval=total_crop_height + 1, dtype=tf.int32)
  total_crop_width = (width - crop_width)
  crop_left = tf.random_uniform([], maxval=total_crop_width + 1, dtype=tf.int32)

  cropped = tf.slice(
      image, [crop_top, crop_left, 0], [crop_height, crop_width, -1])

  cropped = tf.image.random_flip_left_right(cropped)
  return cropped
  def test_get_expected_feature_map_shapes_with_inception_v3(self):
    image_features = {
        'Mixed_5d': tf.random_uniform([4, 35, 35, 256], dtype=tf.float32),
        'Mixed_6e': tf.random_uniform([4, 17, 17, 576], dtype=tf.float32),
        'Mixed_7c': tf.random_uniform([4, 8, 8, 1024], dtype=tf.float32)
    }

    feature_maps = feature_map_generators.multi_resolution_feature_maps(
        feature_map_layout=INCEPTION_V3_LAYOUT,
        depth_multiplier=1,
        min_depth=32,
        insert_1x1_conv=True,
        image_features=image_features)

    expected_feature_map_shapes = {
        'Mixed_5d': (4, 35, 35, 256),
        'Mixed_6e': (4, 17, 17, 576),
        'Mixed_7c': (4, 8, 8, 1024),
        'Mixed_7c_2_Conv2d_3_3x3_s2_512': (4, 4, 4, 512),
        'Mixed_7c_2_Conv2d_4_3x3_s2_256': (4, 2, 2, 256),
        'Mixed_7c_2_Conv2d_5_3x3_s2_128': (4, 1, 1, 128)}

    init_op = tf.global_variables_initializer()
    with self.test_session() as sess:
      sess.run(init_op)
      out_feature_maps = sess.run(feature_maps)
      out_feature_map_shapes = dict(
          (key, value.shape) for key, value in out_feature_maps.items())
      self.assertDictEqual(out_feature_map_shapes, expected_feature_map_shapes)
  def test_get_expected_feature_map_shapes_with_inception_v2(self, use_keras):
    image_features = {
        'Mixed_3c': tf.random_uniform([4, 28, 28, 256], dtype=tf.float32),
        'Mixed_4c': tf.random_uniform([4, 14, 14, 576], dtype=tf.float32),
        'Mixed_5c': tf.random_uniform([4, 7, 7, 1024], dtype=tf.float32)
    }
    feature_map_generator = self._build_feature_map_generator(
        feature_map_layout=INCEPTION_V2_LAYOUT,
        use_keras=use_keras
    )
    feature_maps = feature_map_generator(image_features)

    expected_feature_map_shapes = {
        'Mixed_3c': (4, 28, 28, 256),
        'Mixed_4c': (4, 14, 14, 576),
        'Mixed_5c': (4, 7, 7, 1024),
        'Mixed_5c_2_Conv2d_3_3x3_s2_512': (4, 4, 4, 512),
        'Mixed_5c_2_Conv2d_4_3x3_s2_256': (4, 2, 2, 256),
        'Mixed_5c_2_Conv2d_5_3x3_s2_256': (4, 1, 1, 256)}

    init_op = tf.global_variables_initializer()
    with self.test_session() as sess:
      sess.run(init_op)
      out_feature_maps = sess.run(feature_maps)
      out_feature_map_shapes = dict(
          (key, value.shape) for key, value in out_feature_maps.items())
      self.assertDictEqual(expected_feature_map_shapes, out_feature_map_shapes)
	def denseNet(self, hidden=20, depth=3, act=tf.nn.tanh, dropout=True, norm=None):  #
		if (hidden > 100): print("WARNING: denseNet uses quadratic mem for " + str(hidden))
		if (depth < 3): print(
			"WARNING: did you mean to use Fully connected layer 'dense'? Expecting depth>3 vs " + str(depth))
		inputs = self.last_layer
		inputs_width = self.last_width
		width = hidden
		while depth > 0:
			with tf.name_scope('DenNet_{:d}'.format(width)) as scope:
				print("dense width ", inputs_width, "x", width)
				nr = len(self.layers)
				weights = tf.Variable(tf.random_uniform([inputs_width, width], minval=-1. / width, maxval=1. / width),
				                      name="weights")
				bias = tf.Variable(tf.random_uniform([width], minval=-1. / width, maxval=1. / width),
				                   name="bias")  # auto nr + context
				dense1 = tf.matmul(inputs, weights, name='dense_' + str(nr)) + bias
				tf.summary.histogram('dense_' + str(nr), dense1)
				tf.summary.histogram('dense_' + str(nr) + '/sparsity', tf.nn.zero_fraction(dense1))
				tf.summary.histogram('weights_' + str(nr), weights)
				tf.summary.histogram('weights_' + str(nr) + '/sparsity', tf.nn.zero_fraction(weights))
				tf.summary.histogram('bias_' + str(nr), bias)

				if act: dense1 = act(dense1)
				if norm: dense1 = self.norm(dense1, lsize=1)  # SHAPE!
				if dropout: dense1 = tf.nn.dropout(dense1, self.keep_prob)
				self.add(dense1)
				self.last_width = width
				inputs = tf.concat(1, [inputs, dense1])
				inputs_width += width
				depth = depth - 1
		self.last_width = width
Example #12
0
def random_batch(batch_size, config):
  shape = (batch_size,) + config.input_shape
  images = tf.random_uniform(shape)
  labels = tf.random_uniform(
      [batch_size], minval=0, maxval=config.n_classes, dtype=tf.int32)

  return images, labels
  def test_get_expected_feature_map_shapes_with_embedded_ssd_mobilenet_v1(
      self, use_keras):
    image_features = {
        'Conv2d_11_pointwise': tf.random_uniform([4, 16, 16, 512],
                                                 dtype=tf.float32),
        'Conv2d_13_pointwise': tf.random_uniform([4, 8, 8, 1024],
                                                 dtype=tf.float32),
    }

    feature_map_generator = self._build_feature_map_generator(
        feature_map_layout=EMBEDDED_SSD_MOBILENET_V1_LAYOUT,
        use_keras=use_keras
    )
    feature_maps = feature_map_generator(image_features)

    expected_feature_map_shapes = {
        'Conv2d_11_pointwise': (4, 16, 16, 512),
        'Conv2d_13_pointwise': (4, 8, 8, 1024),
        'Conv2d_13_pointwise_2_Conv2d_2_3x3_s2_512': (4, 4, 4, 512),
        'Conv2d_13_pointwise_2_Conv2d_3_3x3_s2_256': (4, 2, 2, 256),
        'Conv2d_13_pointwise_2_Conv2d_4_2x2_s2_256': (4, 1, 1, 256)}

    init_op = tf.global_variables_initializer()
    with self.test_session() as sess:
      sess.run(init_op)
      out_feature_maps = sess.run(feature_maps)
      out_feature_map_shapes = dict(
          (key, value.shape) for key, value in out_feature_maps.items())
      self.assertDictEqual(expected_feature_map_shapes, out_feature_map_shapes)
    def _setup_variables(self):
        with tf.name_scope("autoencoder_variables"):
            for i in range(self._num_hidden_layers):
                name_w = self._weights_str.format(i + 1)
                w_shape = (self._shape[i], self._shape[i + 1])
                # We use xavier initializer here
                initializer_bound = tf.mul(4.0, tf.sqrt(6.0 / (w_shape[0] + w_shape[1])))
                w_init = tf.random_uniform(w_shape, -1 * initializer_bound, 1 * initializer_bound)
                self[name_w] = tf.Variable(w_init, name = name_w, trainable = True)

                name_b = self._bias_str.format(i + 1)
                b_shape = (self._shape[i + 1], )
                b_init = tf.zeros(b_shape)
                self[name_b] = tf.Variable(b_init, name = name_b, trainable = True)
                print(w_shape, b_shape)
            
            #Output Layer: No weights on the output layer, we only have the bias

            name_w = self._weights_str.format(self._num_hidden_layers + 1) + "_out"
            w_shape = (self._shape[self._num_hidden_layers], self._shape[self._num_hidden_layers + 1])
            w_init = tf.random_uniform(w_shape, -1 * initializer_bound, 1 * initializer_bound)
            self[name_w] = tf.Variable(w_init, name = name_w, trainable = True)
            
            name_b = self._bias_str.format(self._num_hidden_layers + 1) + "_out"
            b_shape = (self._shape[self._num_hidden_layers + 1], )
            b_init = tf.zeros(b_shape)
            self[name_b] = tf.Variable(b_init, name = name_b, trainable = True)
            print(w_shape, b_shape)
            print(self._variables.keys())
Example #15
0
    def __init__(self, config):
        self.config = config

        self.input = tf.placeholder('int32', [self.config.batch_size, config.max_seq_len], name='input')
        self.labels = tf.placeholder('int64', [self.config.batch_size], name='labels')
        self.labels_one_hot = tf.one_hot(indices=self.labels,
                                         depth=config.output_dim,
                                         on_value=1.0,
                                         off_value=0.0,
                                         axis=-1)

        self.gru = GRUCell(config.hidden_state_dim)

        embeddings_we = tf.get_variable('word_embeddings', initializer=tf.random_uniform([config.vocab_size, config.embedding_dim], -1.0, 1.0))
        self.emb = embed_input = tf.nn.embedding_lookup(embeddings_we, self.input)
        inputs = [tf.squeeze(i, squeeze_dims=[1]) for i in tf.split(1, config.max_seq_len, embed_input)]

        outputs, last_slu_state = tf.nn.rnn(
            cell=self.gru,
            inputs=inputs,
            dtype=tf.float32,)

        w_project = tf.get_variable('project2labels', initializer=tf.random_uniform([config.hidden_state_dim, config.output_dim], -1.0, 1.0))
        self.logits = logits_bo = tf.matmul(last_slu_state, w_project)
        tf.histogram_summary('logits', logits_bo)
        self.probabilities = tf.nn.softmax(logits_bo)
        self.loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits_bo, self.labels_one_hot))
        self.predict = tf.nn.softmax(logits_bo)

        # TensorBoard
        self.accuracy = tf.reduce_mean(tf.cast(tf.equal(tf.argmax(self.predict, 1), self.labels), 'float32'), name='accuracy')
        tf.scalar_summary('CCE loss', self.loss)
        tf.scalar_summary('Accuracy', self.accuracy)
        self.tb_info = tf.merge_all_summaries()
Example #16
0
    def __init__(self, rnn_size, rnn_layer, batch_size, input_embedding_size, dim_image, dim_hidden, max_words_q, vocabulary_size, drop_out_rate):

	self.rnn_size = rnn_size
	self.rnn_layer = rnn_layer
	self.batch_size = batch_size
	self.input_embedding_size = input_embedding_size
	self.dim_image = dim_image
	self.dim_hidden = dim_hidden
	self.max_words_q = max_words_q
	self.vocabulary_size = vocabulary_size	
	self.drop_out_rate = drop_out_rate

	# Network definitions
	# question-embedding
	self.embed_ques_W = tf.Variable(tf.random_uniform([self.vocabulary_size, self.input_embedding_size], -0.08, 0.08), name='embed_ques_W')

	# encoder: RNN body
	self.lstm = rnn_cell.BasicLSTMCell(rnn_size)	# change basic LSTM to LSTM
	self.lstm_dropout = rnn_cell.DropoutWrapper(self.lstm, output_keep_prob = 1 - self.drop_out_rate)
	self.stacked_lstm = rnn_cell.MultiRNNCell([self.lstm_dropout] * self.rnn_layer)

	# MULTIMODAL 
	# state-embedding
        self.embed_state_W = tf.Variable(tf.random_uniform([2*rnn_size*rnn_layer, self.dim_hidden], -0.08,0.08),name='embed_state_W')
	# image-embedding
	self.embed_image_W = tf.Variable(tf.random_uniform([dim_image, self.dim_hidden], -0.08, 0.08), name='embed_image_W')
	# score-embedding
	self.embed_scor_W = tf.Variable(tf.random_uniform([dim_hidden, num_output], -0.08, 0.08), name='embed_scor_W')
Example #17
0
  def input_fn(params):
    """Generated input_fn for the given epoch."""
    batch_size = (params["batch_size"] if is_training else
                  params["eval_batch_size"] or params["batch_size"])
    num_users = params["num_users"]
    num_items = params["num_items"]

    users = tf.random_uniform([batch_size], dtype=tf.int32, minval=0,
                              maxval=num_users)
    items = tf.random_uniform([batch_size], dtype=tf.int32, minval=0,
                              maxval=num_items)

    if is_training:
      labels = tf.random_uniform([batch_size], dtype=tf.int32, minval=0,
                                 maxval=2)
      data = {
          movielens.USER_COLUMN: users,
          movielens.ITEM_COLUMN: items,
      }, labels
    else:
      dupe_mask = tf.cast(tf.random_uniform([batch_size], dtype=tf.int32,
                                            minval=0, maxval=2), tf.bool)
      data = {
          movielens.USER_COLUMN: users,
          movielens.ITEM_COLUMN: items,
          rconst.DUPLICATE_MASK: dupe_mask,
      }

    dataset = tf.data.Dataset.from_tensors(data).repeat(
        SYNTHETIC_BATCHES_PER_EPOCH)
    dataset = dataset.prefetch(32)
    return dataset
def dae(x, hparams, name):
  with tf.variable_scope(name):
    m = tf.layers.dense(x, hparams.v_size, name="mask")
    if hparams.softmax_k > 0:
      m, kl = top_k_softmax(m, hparams.softmax_k)
      return m, m, 1.0 - tf.reduce_mean(kl)
    logsm = tf.nn.log_softmax(m)
    # Gumbel-softmax sample.
    gumbel_samples = gumbel_sample(common_layers.shape_list(m))
    steps = hparams.kl_warmup_steps
    gumbel_samples *= common_layers.inverse_exp_decay(steps // 5) * 0.5
    temperature = 1.2 - common_layers.inverse_lin_decay(steps)
    # 10% of the time keep reasonably high temperature to keep learning.
    temperature = tf.cond(tf.less(tf.random_uniform([]), 0.9),
                          lambda: temperature,
                          lambda: tf.random_uniform([], minval=0.5, maxval=1.0))
    s = tf.nn.softmax((logsm + gumbel_samples) / temperature)
    m = tf.nn.softmax(m)
    kl = - tf.reduce_max(logsm, axis=-1)
    if _DO_SUMMARIES:
      tf.summary.histogram("max-log", tf.reshape(kl, [-1]))
    # Calculate the argmax and construct hot vectors.
    maxvec = tf.reshape(tf.argmax(m, axis=-1), [-1])
    maxvhot = tf.stop_gradient(tf.one_hot(maxvec, hparams.v_size))
    # Add losses that prevent too few being used.
    distrib = tf.reshape(logsm, [-1, hparams.v_size]) * maxvhot
    d_mean = tf.reduce_mean(distrib, axis=[0], keep_dims=True)
    d_variance = tf.reduce_mean(tf.square(distrib - d_mean), axis=[0])
    d_dev = - tf.reduce_mean(d_variance)
    ret = s
    if hparams.mode != tf.contrib.learn.ModeKeys.TRAIN:
      ret = tf.reshape(maxvhot, common_layers.shape_list(s))  # Just hot @eval.
    return m, ret, d_dev * 5.0 + tf.reduce_mean(kl) * 0.002
Example #19
0
def init_var_map(init_vars, init_path=None):
    if init_path is not None:
        load_var_map = pkl.load(open(init_path, 'rb'))
        print('load variable map from', init_path, load_var_map.keys())
    var_map = {}
    for var_name, var_shape, init_method, dtype in init_vars:
        if init_method == 'zero':
            var_map[var_name] = tf.Variable(tf.zeros(var_shape, dtype=dtype), name=var_name, dtype=dtype)
        elif init_method == 'one':
            var_map[var_name] = tf.Variable(tf.ones(var_shape, dtype=dtype), name=var_name, dtype=dtype)
        elif init_method == 'normal':
            var_map[var_name] = tf.Variable(tf.random_normal(var_shape, mean=0.0, stddev=STDDEV, dtype=dtype),
                                            name=var_name, dtype=dtype)
        elif init_method == 'tnormal':
            var_map[var_name] = tf.Variable(tf.truncated_normal(var_shape, mean=0.0, stddev=STDDEV, dtype=dtype),
                                            name=var_name, dtype=dtype)
        elif init_method == 'uniform':
            var_map[var_name] = tf.Variable(tf.random_uniform(var_shape, minval=MINVAL, maxval=MAXVAL, dtype=dtype),
                                            name=var_name, dtype=dtype)
        elif init_method == 'xavier':
            maxval = np.sqrt(6. / np.sum(var_shape))
            minval = -maxval
            var_map[var_name] = tf.Variable(tf.random_uniform(var_shape, minval=minval, maxval=maxval, dtype=dtype),
                                            name=var_name, dtype=dtype)
        elif isinstance(init_method, int) or isinstance(init_method, float):
            var_map[var_name] = tf.Variable(tf.ones(var_shape, dtype=dtype) * init_method, name=var_name, dtype=dtype)
        elif init_method in load_var_map:
            if load_var_map[init_method].shape == tuple(var_shape):
                var_map[var_name] = tf.Variable(load_var_map[init_method], name=var_name, dtype=dtype)
            else:
                print('BadParam: init method', init_method, 'shape', var_shape, load_var_map[init_method].shape)
        else:
            print('BadParam: init method', init_method)
    return var_map
    def __init__(self, dim_image, dim_embed, dim_hidden, batch_size, n_lstm_steps, n_words, bias_init_vector=None):

        self.dim_image = np.int(dim_image)
        self.dim_embed = np.int(dim_embed)
        self.dim_hidden = np.int(dim_hidden)
        self.batch_size = np.int(batch_size)
        self.n_lstm_steps = np.int(n_lstm_steps)
        self.n_words = np.int(n_words)

        with tf.device("/cpu:0"):
            self.Wemb = tf.Variable(tf.random_uniform([n_words, dim_embed], -0.1, 0.1), name='Wemb')

        self.bemb = self.init_bias(dim_embed, name='bemb')

        self.lstm = rnn_cell.BasicLSTMCell(dim_hidden)

        #self.encode_img_W = self.init_weight(dim_image, dim_hidden, name='encode_img_W')
        self.encode_img_W = tf.Variable(tf.random_uniform([dim_image, dim_hidden], -0.1, 0.1), name='encode_img_W')
        self.encode_img_b = self.init_bias(dim_hidden, name='encode_img_b')

        self.embed_word_W = tf.Variable(tf.random_uniform([dim_hidden, n_words], -0.1, 0.1), name='embed_word_W')

        if bias_init_vector is not None:
            self.embed_word_b = tf.Variable(bias_init_vector.astype(np.float32), name='embed_word_b')
        else:
            self.embed_word_b = self.init_bias(n_words, name='embed_word_b')
  def test_get_expected_feature_map_shapes_with_embedded_ssd_mobilenet_v1(
      self):
    image_features = {
        'Conv2d_11_pointwise': tf.random_uniform([4, 16, 16, 512],
                                                 dtype=tf.float32),
        'Conv2d_13_pointwise': tf.random_uniform([4, 8, 8, 1024],
                                                 dtype=tf.float32),
    }

    feature_maps = feature_map_generators.multi_resolution_feature_maps(
        feature_map_layout=EMBEDDED_SSD_MOBILENET_V1_LAYOUT,
        depth_multiplier=1,
        min_depth=32,
        insert_1x1_conv=True,
        image_features=image_features)

    expected_feature_map_shapes = {
        'Conv2d_11_pointwise': (4, 16, 16, 512),
        'Conv2d_13_pointwise': (4, 8, 8, 1024),
        'Conv2d_13_pointwise_2_Conv2d_2_3x3_s2_512': (4, 4, 4, 512),
        'Conv2d_13_pointwise_2_Conv2d_3_3x3_s2_256': (4, 2, 2, 256),
        'Conv2d_13_pointwise_2_Conv2d_4_2x2_s2_256': (4, 1, 1, 256)}

    init_op = tf.global_variables_initializer()
    with self.test_session() as sess:
      sess.run(init_op)
      out_feature_maps = sess.run(feature_maps)
      out_feature_map_shapes = dict(
          (key, value.shape) for key, value in out_feature_maps.items())
      self.assertDictEqual(out_feature_map_shapes, expected_feature_map_shapes)
def initialize_mod_binary_MERA(phys_dim,
                               chi,
                               dtype=tf.float64):
                          
    """
    Parameters:
    -------------------
    phys_dim:         int 
                      Hilbert space dimension of the bottom layer
    chi:              int 
                      maximum bond dimension
    dtype:            tensorflow dtype
                      dtype of the MERA tensors
    Returns:
    -------------------
    (wC, vC, uC, rhoAB, rhoBA)
    wC, vC, uC:      list of tf.Tensor
    rhoAB, rhoBA:    tf.Tensor
    """
    
    wC, vC, uC = increase_bond_dimension_by_adding_layers(chi_new=chi,
                                                          wC=[tf.random_uniform(shape=[phys_dim, phys_dim, phys_dim],dtype=dtype)],
                                                          vC=[tf.random_uniform(shape=[phys_dim, phys_dim, phys_dim],dtype=dtype)],
                                                          uC=[tf.random_uniform(shape=[phys_dim, phys_dim, phys_dim, phys_dim],dtype=dtype)])
    chi_top = wC[-1].shape[2]
    rhoAB = tf.reshape(tf.eye(chi_top * chi_top, dtype=dtype),
                       (chi_top, chi_top, chi_top, chi_top))

    rhoBA = tf.reshape(tf.eye(chi_top * chi_top, dtype=dtype),
                       (chi_top, chi_top, chi_top, chi_top))
    
    return wC, vC, uC, rhoAB, rhoBA
Example #23
0
 def testUnit4(self):
   x1 = tf.random_uniform([1, 19, 19, 1024])
   x2 = tf.random_uniform([1, 19, 19, 1024])
   x1, x2 = revnet.unit(x1, x2, block_num=4, depth=416,
                        num_layers=1, stride=2)
   self.assertEquals(x1.get_shape().as_list(), [1, 10, 10, 1664])
   self.assertEquals(x2.get_shape().as_list(), [1, 10, 10, 1664])
Example #24
0
 def testUnit1(self):
   x1 = tf.random_uniform([4, 74, 74, 256])
   x2 = tf.random_uniform([4, 74, 74, 256])
   x1, x2 = revnet.unit(x1, x2, block_num=1, depth=64,
                        first_batch_norm=True, num_layers=1)
   self.assertEquals(x1.get_shape().as_list(), [4, 74, 74, 256])
   self.assertEquals(x2.get_shape().as_list(), [4, 74, 74, 256])
Example #25
0
 def testUnit3(self):
   x1 = tf.random_uniform([1, 37, 37, 512])
   x2 = tf.random_uniform([1, 37, 37, 512])
   x1, x2 = revnet.unit(x1, x2, block_num=3, depth=256,
                        num_layers=10, stride=2)
   self.assertEquals(x1.get_shape().as_list(), [1, 19, 19, 1024])
   self.assertEquals(x2.get_shape().as_list(), [1, 19, 19, 1024])
  def benchmarkEagerLinearRegression(self):
    num_batches = 200
    batch_size = 64
    dataset = linear_regression.synthetic_dataset(
        w=tf.random_uniform([3, 1]),
        b=tf.random_uniform([1]),
        noise_level=0.01,
        batch_size=batch_size,
        num_batches=num_batches)
    burn_in_dataset = dataset.take(10)

    model = linear_regression.LinearModel()

    with tf.device(device()):
      optimizer = tf.train.GradientDescentOptimizer(learning_rate=0.1)

      # Perform burn-in.
      linear_regression.fit(model, burn_in_dataset, optimizer)

      start_time = time.time()
      linear_regression.fit(model, dataset, optimizer)
      wall_time = time.time() - start_time

      examples_per_sec = num_batches * batch_size / wall_time
      self.report_benchmark(
          name="eager_train_%s" %
          ("gpu" if tfe.num_gpus() > 0 else "cpu"),
          iters=num_batches,
          extras={"examples_per_sec": examples_per_sec},
          wall_time=wall_time)
Example #27
0
def get_online_sequences(sequence_length, batch_size):
    """Gets tensor which constantly produce new random examples.

    Args:
        sequence_length: total length of the sequences.
        batch_size: how many at a time.

    Returns:
        (data, targets): data is `[sequence_length, batch_size, 2]` and targets
            are `[batch_size]`.
    """
    # getting the random channel is easy
    random_data = tf.random_uniform([sequence_length, batch_size, 1],
                                    minval=0.0, maxval=1.0)
    # now we need a random marker in each half of the data
    random_index_1 = tf.random_uniform([1, batch_size], minval=0,
                                       maxval=sequence_length//2,
                                       dtype=tf.int32)
    random_index_2 = tf.random_uniform([1, batch_size], minval=0,
                                       maxval=sequence_length//2,
                                       dtype=tf.int32)
    markers = tf.concat(axis=2, values=[tf.one_hot(random_index_1, sequence_length//2),
                            tf.one_hot(random_index_2, sequence_length//2)])
    markers = tf.transpose(markers)
    targets = tf.reduce_sum(random_data * markers,
                            axis=0)
    return tf.concat(axis=2, values=[random_data, markers]), tf.squeeze(targets)
Example #28
0
 def testUnit3D(self):
   x1 = tf.random_uniform([4, 74, 74, 74, 256])
   x2 = tf.random_uniform([4, 74, 74, 74, 256])
   x1, x2 = revnet.unit(x1, x2, block_num=5, depth=128,
                        num_layers=1, dim='3d', stride=2)
   self.assertEquals(x1.get_shape().as_list(), [4, 37, 37, 37, 512])
   self.assertEquals(x2.get_shape().as_list(), [4, 37, 37, 37, 512])
Example #29
0
 def __init__(self, dh, dq, da, di, max_q, Nq, Na, cell='rnn',trainable_embeddings=True):
     self.dh = dh
     self.dq = dq
     self.da = da
     self.di = di
     self.max_q = max_q
     self.Nq = Nq
     self.Na = Na
     self.cell = cell
     
     with tf.device('/cpu:0'):
         self.qemb_W = tf.get_variable('qemb_w',
                                       initializer=tf.random_uniform([self.Nq, self.dq], -0.1, 0.1),
                                       trainable = trainable_embeddings)
     self.aemb_W = tf.get_variable(name='aemb_w',
                                   initializer=tf.random_uniform([self.dh, self.Na], -0.1, 0.1))
     self.aemb_b = tf.get_variable(name='aemb_b',
                                   initializer=tf.zeros([self.Na]))
     self.Wi = tf.get_variable(name='Wi', shape=[self.di, self.dq],
                               initializer=tf.contrib.layers.xavier_initializer())
     self.bi = tf.get_variable(name='bi',
                                   initializer=tf.zeros([self.dq]))
     
     if self.cell == 'rnn':
         self.recur = tf.nn.rnn_cell.RNNCell(self.dh)
     elif self.cell == 'lstm':
         self.recur = tf.nn.rnn_cell.LSTMCell(self.dh)
     elif self.cell == 'gru':
         self.recur = tf.nn.rnn_cell.GRUCell(self.dh)
     else:
         raise NotImplementedError
Example #30
0
    def test_horovod_allreduce_error(self):
        """Test that the allreduce raises an error if different ranks try to
        send tensors of different rank or dimension."""
        hvd.init()
        rank = hvd.rank()
        size = hvd.size()

        # This test does not apply if there is only one worker.
        if size == 1:
            return

        with self.test_session() as session:
            # Same rank, different dimension
            tf.set_random_seed(1234)
            dims = [17 + rank] * 3
            tensor = tf.random_uniform(dims, -1.0, 1.0)
            with self.assertRaises(tf.errors.FailedPreconditionError):
                session.run(hvd.allreduce(tensor))

            # Same number of elements, different rank
            tf.set_random_seed(1234)
            if rank == 0:
                dims = [17, 23 * 57]
            else:
                dims = [17, 23, 57]
            tensor = tf.random_uniform(dims, -1.0, 1.0)
            with self.assertRaises(tf.errors.FailedPreconditionError):
                session.run(hvd.allreduce(tensor))
import tensorflow as tf

sess = tf.Session()
my_tensor = tf.random_uniform((4, 4), 0, 1)
print(my_tensor)

my_var = tf.Variable(initial_value=my_tensor)
print(my_var)

#sess.run(my_var)

init = tf.global_variables_initializer()
sess.run(init)
print(sess.run(my_var))

ph = tf.placeholder(tf.float32, shape=(None, 5))
Example #32
0
import tensorflow as tf
import numpy as np

xy = np.loadtxt('logisticTrain.txt', unpack=True, dtype='float32')

x_data = xy[0:-1]
y_data = xy[-1]

X = tf.placeholder(tf.float32)
Y = tf.placeholder(tf.float32)

W = tf.Variable(tf.random_uniform([1, len(x_data)], -1.0, 1.0))

# Our hypothesis
h = tf.matmul(W, X)
hypothesis = tf.div(1., 1 + tf.exp(-h))

# Cost function
cost = -tf.reduce_mean(Y * tf.log(hypothesis) +
                       (1 - Y) * tf.log(1 - hypothesis))

# Minimize
a = tf.Variable(0.1)  # Learning rate, alpha
optimizer = tf.train.GradientDescentOptimizer(a)
train = optimizer.minimize(cost)

# Before starting, initialize the variables. We will `run` this first.
init = tf.initialize_all_variables()

# Launch the graph.
with tf.Session() as sess:
Example #33
0
def main():
    print("Local rank: ", hvd.local_rank(), hvd.size())

    logdir = osp.join(FLAGS.logdir, FLAGS.exp)
    if hvd.rank() == 0:
        if not osp.exists(logdir):
            os.makedirs(logdir)
        logger = TensorBoardOutputFormat(logdir)
    else:
        logger = None

    LABEL = None
    print("Loading data...")
    if FLAGS.dataset == 'cifar10':
        dataset = Cifar10(augment=FLAGS.augment, rescale=FLAGS.rescale)
        test_dataset = Cifar10(train=False, rescale=FLAGS.rescale)
        channel_num = 3

        X_NOISE = tf.placeholder(shape=(None, 32, 32, 3), dtype=tf.float32)
        X = tf.placeholder(shape=(None, 32, 32, 3), dtype=tf.float32)
        LABEL = tf.placeholder(shape=(None, 10), dtype=tf.float32)
        LABEL_POS = tf.placeholder(shape=(None, 10), dtype=tf.float32)

        if FLAGS.large_model:
            model = ResNet32Large(
                num_channels=channel_num,
                num_filters=128,
                train=True)
        elif FLAGS.larger_model:
            model = ResNet32Larger(
                num_channels=channel_num,
                num_filters=128)
        elif FLAGS.wider_model:
            model = ResNet32Wider(
                num_channels=channel_num,
                num_filters=192)
        else:
            model = ResNet32(
                num_channels=channel_num,
                num_filters=128)

    elif FLAGS.dataset == 'imagenet':
        dataset = Imagenet(train=True)
        test_dataset = Imagenet(train=False)
        channel_num = 3
        X_NOISE = tf.placeholder(shape=(None, 32, 32, 3), dtype=tf.float32)
        X = tf.placeholder(shape=(None, 32, 32, 3), dtype=tf.float32)
        LABEL = tf.placeholder(shape=(None, 1000), dtype=tf.float32)
        LABEL_POS = tf.placeholder(shape=(None, 1000), dtype=tf.float32)

        model = ResNet32Wider(
            num_channels=channel_num,
            num_filters=256)

    elif FLAGS.dataset == 'imagenetfull':
        channel_num = 3
        X_NOISE = tf.placeholder(shape=(None, 128, 128, 3), dtype=tf.float32)
        X = tf.placeholder(shape=(None, 128, 128, 3), dtype=tf.float32)
        LABEL = tf.placeholder(shape=(None, 1000), dtype=tf.float32)
        LABEL_POS = tf.placeholder(shape=(None, 1000), dtype=tf.float32)

        model = ResNet128(
            num_channels=channel_num,
            num_filters=64)

    elif FLAGS.dataset == 'mnist':
        dataset = Mnist(rescale=FLAGS.rescale)
        test_dataset = dataset
        channel_num = 1
        X_NOISE = tf.placeholder(shape=(None, 28, 28), dtype=tf.float32)
        X = tf.placeholder(shape=(None, 28, 28), dtype=tf.float32)
        LABEL = tf.placeholder(shape=(None, 10), dtype=tf.float32)
        LABEL_POS = tf.placeholder(shape=(None, 10), dtype=tf.float32)

        model = MnistNet(
            num_channels=channel_num,
            num_filters=FLAGS.num_filters)

    elif FLAGS.dataset == 'dsprites':
        dataset = DSprites(
            cond_shape=FLAGS.cond_shape,
            cond_size=FLAGS.cond_size,
            cond_pos=FLAGS.cond_pos,
            cond_rot=FLAGS.cond_rot)
        test_dataset = dataset
        channel_num = 1

        X_NOISE = tf.placeholder(shape=(None, 64, 64), dtype=tf.float32)
        X = tf.placeholder(shape=(None, 64, 64), dtype=tf.float32)

        if FLAGS.dpos_only:
            LABEL = tf.placeholder(shape=(None, 2), dtype=tf.float32)
            LABEL_POS = tf.placeholder(shape=(None, 2), dtype=tf.float32)
        elif FLAGS.dsize_only:
            LABEL = tf.placeholder(shape=(None, 1), dtype=tf.float32)
            LABEL_POS = tf.placeholder(shape=(None, 1), dtype=tf.float32)
        elif FLAGS.drot_only:
            LABEL = tf.placeholder(shape=(None, 2), dtype=tf.float32)
            LABEL_POS = tf.placeholder(shape=(None, 2), dtype=tf.float32)
        elif FLAGS.cond_size:
            LABEL = tf.placeholder(shape=(None, 1), dtype=tf.float32)
            LABEL_POS = tf.placeholder(shape=(None, 1), dtype=tf.float32)
        elif FLAGS.cond_shape:
            LABEL = tf.placeholder(shape=(None, 3), dtype=tf.float32)
            LABEL_POS = tf.placeholder(shape=(None, 3), dtype=tf.float32)
        elif FLAGS.cond_pos:
            LABEL = tf.placeholder(shape=(None, 2), dtype=tf.float32)
            LABEL_POS = tf.placeholder(shape=(None, 2), dtype=tf.float32)
        elif FLAGS.cond_rot:
            LABEL = tf.placeholder(shape=(None, 2), dtype=tf.float32)
            LABEL_POS = tf.placeholder(shape=(None, 2), dtype=tf.float32)
        else:
            LABEL = tf.placeholder(shape=(None, 3), dtype=tf.float32)
            LABEL_POS = tf.placeholder(shape=(None, 3), dtype=tf.float32)

        model = DspritesNet(
            num_channels=channel_num,
            num_filters=FLAGS.num_filters,
            cond_size=FLAGS.cond_size,
            cond_shape=FLAGS.cond_shape,
            cond_pos=FLAGS.cond_pos,
            cond_rot=FLAGS.cond_rot)

    print("Done loading...")

    if FLAGS.dataset == "imagenetfull":
        # In the case of full imagenet, use custom_tensorflow dataloader
        data_loader = TFImagenetLoader('train', FLAGS.batch_size, hvd.rank(), hvd.size(), rescale=FLAGS.rescale)
    else:
        data_loader = DataLoader(
            dataset,
            batch_size=FLAGS.batch_size,
            num_workers=FLAGS.data_workers,
            drop_last=True,
            shuffle=True)

    batch_size = FLAGS.batch_size

    weights = [model.construct_weights('context_0')]

    Y = tf.placeholder(shape=(None), dtype=tf.int32)

    # Varibles to run in training
    X_SPLIT = tf.split(X, FLAGS.num_gpus)
    X_NOISE_SPLIT = tf.split(X_NOISE, FLAGS.num_gpus)
    LABEL_SPLIT = tf.split(LABEL, FLAGS.num_gpus)
    LABEL_POS_SPLIT = tf.split(LABEL_POS, FLAGS.num_gpus)
    LABEL_SPLIT_INIT = list(LABEL_SPLIT)
    tower_grads = []
    tower_gen_grads = []
    x_mod_list = []

    optimizer = AdamOptimizer(FLAGS.lr, beta1=0.0, beta2=0.999)
    optimizer = hvd.DistributedOptimizer(optimizer)

    for j in range(FLAGS.num_gpus):

        if FLAGS.model_cclass:
            ind_batch_size = FLAGS.batch_size // FLAGS.num_gpus
            label_tensor = tf.Variable(
                tf.convert_to_tensor(
                    np.reshape(
                        np.tile(np.eye(10), (FLAGS.batch_size, 1, 1)),
                        (FLAGS.batch_size * 10, 10)),
                    dtype=tf.float32),
                trainable=False,
                dtype=tf.float32)
            x_split = tf.tile(
                tf.reshape(
                    X_SPLIT[j], (ind_batch_size, 1, 32, 32, 3)), (1, 10, 1, 1, 1))
            x_split = tf.reshape(x_split, (ind_batch_size * 10, 32, 32, 3))
            energy_pos = model.forward(
                x_split,
                weights[0],
                label=label_tensor,
                stop_at_grad=False)

            energy_pos_full = tf.reshape(energy_pos, (ind_batch_size, 10))
            energy_partition_est = tf.reduce_logsumexp(
                energy_pos_full, axis=1, keepdims=True)
            uniform = tf.random_uniform(tf.shape(energy_pos_full))
            label_tensor = tf.argmax(-energy_pos_full -
                                     tf.log(-tf.log(uniform)) - energy_partition_est, axis=1)
            label = tf.one_hot(label_tensor, 10, dtype=tf.float32)
            label = tf.Print(label, [label_tensor, energy_pos_full])
            LABEL_SPLIT[j] = label
            energy_pos = tf.concat(energy_pos, axis=0)
        else:
            energy_pos = [
                model.forward(
                    X_SPLIT[j],
                    weights[0],
                    label=LABEL_POS_SPLIT[j],
                    stop_at_grad=False)]
            energy_pos = tf.concat(energy_pos, axis=0)

        print("Building graph...")
        x_mod = x_orig = X_NOISE_SPLIT[j]

        x_grads = []

        energy_negs = []
        loss_energys = []

        energy_negs.extend([model.forward(tf.stop_gradient(
            x_mod), weights[0], label=LABEL_SPLIT[j], stop_at_grad=False, reuse=True)])
        eps_begin = tf.zeros(1)

        steps = tf.constant(0)
        c = lambda i, x: tf.less(i, FLAGS.num_steps)

        def langevin_step(counter, x_mod):
            x_mod = x_mod + tf.random_normal(tf.shape(x_mod),
                                             mean=0.0,
                                             stddev=0.005 * FLAGS.rescale * FLAGS.noise_scale)

            energy_noise = energy_start = tf.concat(
                [model.forward(
                        x_mod,
                        weights[0],
                        label=LABEL_SPLIT[j],
                        reuse=True,
                        stop_at_grad=False,
                        stop_batch=True)],
                axis=0)

            x_grad, label_grad = tf.gradients(
                FLAGS.temperature * energy_noise, [x_mod, LABEL_SPLIT[j]])
            energy_noise_old = energy_noise

            lr = FLAGS.step_lr

            if FLAGS.proj_norm != 0.0:
                if FLAGS.proj_norm_type == 'l2':
                    x_grad = tf.clip_by_norm(x_grad, FLAGS.proj_norm)
                elif FLAGS.proj_norm_type == 'li':
                    x_grad = tf.clip_by_value(
                        x_grad, -FLAGS.proj_norm, FLAGS.proj_norm)
                else:
                    print("Other types of projection are not supported!!!")
                    assert False

            # Clip gradient norm for now
            if FLAGS.hmc:
                # Step size should be tuned to get around 65% acceptance
                def energy(x):
                    return FLAGS.temperature * \
                        model.forward(x, weights[0], label=LABEL_SPLIT[j], reuse=True)

                x_last = hmc(x_mod, 15., 10, energy)
            else:
                x_last = x_mod - (lr) * x_grad

            x_mod = x_last
            x_mod = tf.clip_by_value(x_mod, 0, FLAGS.rescale)

            counter = counter + 1

            return counter, x_mod

        steps, x_mod = tf.while_loop(c, langevin_step, (steps, x_mod))

        energy_eval = model.forward(x_mod, weights[0], label=LABEL_SPLIT[j],
                                    stop_at_grad=False, reuse=True)
        x_grad = tf.gradients(FLAGS.temperature * energy_eval, [x_mod])[0]
        x_grads.append(x_grad)

        energy_negs.append(
            model.forward(
                tf.stop_gradient(x_mod),
                weights[0],
                label=LABEL_SPLIT[j],
                stop_at_grad=False,
                reuse=True))

        test_x_mod = x_mod

        temp = FLAGS.temperature

        energy_neg = energy_negs[-1]
        x_off = tf.reduce_mean(
            tf.abs(x_mod[:tf.shape(X_SPLIT[j])[0]] - X_SPLIT[j]))

        loss_energy = model.forward(
            x_mod,
            weights[0],
            reuse=True,
            label=LABEL,
            stop_grad=True)

        print("Finished processing loop construction ...")

        target_vars = {}

        if FLAGS.cclass or FLAGS.model_cclass:
            label_sum = tf.reduce_sum(LABEL_SPLIT[0], axis=0)
            label_prob = label_sum / tf.reduce_sum(label_sum)
            label_ent = -tf.reduce_sum(label_prob *
                                       tf.math.log(label_prob + 1e-7))
        else:
            label_ent = tf.zeros(1)

        target_vars['label_ent'] = label_ent

        if FLAGS.train:

            if FLAGS.objective == 'logsumexp':
                pos_term = temp * energy_pos
                energy_neg_reduced = (energy_neg - tf.reduce_min(energy_neg))
                coeff = tf.stop_gradient(tf.exp(-temp * energy_neg_reduced))
                norm_constant = tf.stop_gradient(tf.reduce_sum(coeff)) + 1e-4
                pos_loss = tf.reduce_mean(temp * energy_pos)
                neg_loss = coeff * (-1 * temp * energy_neg) / norm_constant
                loss_ml = FLAGS.ml_coeff * (pos_loss + tf.reduce_sum(neg_loss))
            elif FLAGS.objective == 'cd':
                pos_loss = tf.reduce_mean(temp * energy_pos)
                neg_loss = -tf.reduce_mean(temp * energy_neg)
                loss_ml = FLAGS.ml_coeff * (pos_loss + tf.reduce_sum(neg_loss))
            elif FLAGS.objective == 'softplus':
                loss_ml = FLAGS.ml_coeff * \
                    tf.nn.softplus(temp * (energy_pos - energy_neg))

            loss_total = tf.reduce_mean(loss_ml)

            if not FLAGS.zero_kl:
                loss_total = loss_total + tf.reduce_mean(loss_energy)

            loss_total = loss_total + \
                FLAGS.l2_coeff * (tf.reduce_mean(tf.square(energy_pos)) + tf.reduce_mean(tf.square((energy_neg))))

            print("Started gradient computation...")
            gvs = optimizer.compute_gradients(loss_total)
            gvs = [(k, v) for (k, v) in gvs if k is not None]

            print("Applying gradients...")

            tower_grads.append(gvs)

            print("Finished applying gradients.")

            target_vars['loss_ml'] = loss_ml
            target_vars['total_loss'] = loss_total
            target_vars['loss_energy'] = loss_energy
            target_vars['weights'] = weights
            target_vars['gvs'] = gvs

        target_vars['X'] = X
        target_vars['Y'] = Y
        target_vars['LABEL'] = LABEL
        target_vars['LABEL_POS'] = LABEL_POS
        target_vars['X_NOISE'] = X_NOISE
        target_vars['energy_pos'] = energy_pos
        target_vars['energy_start'] = energy_negs[0]

        if len(x_grads) >= 1:
            target_vars['x_grad'] = x_grads[-1]
            target_vars['x_grad_first'] = x_grads[0]
        else:
            target_vars['x_grad'] = tf.zeros(1)
            target_vars['x_grad_first'] = tf.zeros(1)

        target_vars['x_mod'] = x_mod
        target_vars['x_off'] = x_off
        target_vars['temp'] = temp
        target_vars['energy_neg'] = energy_neg
        target_vars['test_x_mod'] = test_x_mod
        target_vars['eps_begin'] = eps_begin

    if FLAGS.train:
        grads = average_gradients(tower_grads)
        train_op = optimizer.apply_gradients(grads)
        target_vars['train_op'] = train_op

    config = tf.ConfigProto()

    if hvd.size() > 1:
        config.gpu_options.visible_device_list = str(hvd.local_rank())

    sess = tf.Session(config=config)

    saver = loader = tf.train.Saver(
        max_to_keep=30, keep_checkpoint_every_n_hours=6)

    total_parameters = 0
    for variable in tf.trainable_variables():
        # shape is an array of tf.Dimension
        shape = variable.get_shape()
        variable_parameters = 1
        for dim in shape:
            variable_parameters *= dim.value
        total_parameters += variable_parameters
    print("Model has a total of {} parameters".format(total_parameters))

    sess.run(tf.global_variables_initializer())

    resume_itr = 0

    if (FLAGS.resume_iter != -1 or not FLAGS.train) and hvd.rank() == 0:
        model_file = osp.join(logdir, 'model_{}'.format(FLAGS.resume_iter))
        resume_itr = FLAGS.resume_iter
        # saver.restore(sess, model_file)
        optimistic_restore(sess, model_file)

    sess.run(hvd.broadcast_global_variables(0))
    print("Initializing variables...")

    print("Start broadcast")
    print("End broadcast")

    if FLAGS.train:
        train(target_vars, saver, sess,
              logger, data_loader, resume_itr,
              logdir)

    test(target_vars, saver, sess, logger, data_loader)
    def build_graph(self):
        """Build the model graph."""
        opts = self._options

        # The training data. A text file.
        (words, counts, words_per_epoch, current_epoch, total_words_processed,
         examples, labels) = word2vec.skipgram(filename=opts.train_data,
                                               batch_size=opts.batch_size,
                                               window_size=opts.window_size,
                                               min_count=opts.min_count,
                                               subsample=opts.subsample)
        (opts.vocab_words, opts.vocab_counts,
         opts.words_per_epoch) = self._session.run(
             [words, counts, words_per_epoch])
        opts.vocab_size = len(opts.vocab_words)
        print("Data file: ", opts.train_data)
        print("Vocab size: ", opts.vocab_size - 1, " + UNK")
        print("Words per epoch: ", opts.words_per_epoch)

        self._id2word = opts.vocab_words
        for i, w in enumerate(self._id2word):
            self._word2id[w] = i

        # Declare all variables we need.
        # Input words embedding: [vocab_size, emb_dim]
        w_in = tf.Variable(tf.random_uniform([opts.vocab_size, opts.emb_dim],
                                             -0.5 / opts.emb_dim,
                                             0.5 / opts.emb_dim),
                           name="w_in")

        # Global step: scalar, i.e., shape [].
        w_out = tf.Variable(tf.zeros([opts.vocab_size, opts.emb_dim]),
                            name="w_out")

        # Global step: []
        global_step = tf.Variable(0, name="global_step")

        # Linear learning rate decay.
        words_to_train = float(opts.words_per_epoch * opts.epochs_to_train)
        lr = opts.learning_rate * tf.maximum(
            0.0001,
            1.0 - tf.cast(total_words_processed, tf.float32) / words_to_train)

        # Training nodes.
        inc = global_step.assign_add(1)
        with tf.control_dependencies([inc]):
            train = word2vec.neg_train(w_in,
                                       w_out,
                                       examples,
                                       labels,
                                       lr,
                                       vocab_count=opts.vocab_counts.tolist(),
                                       num_negative_samples=opts.num_samples)

        self._w_in = w_in
        self._examples = examples
        self._labels = labels
        self._lr = lr
        self._train = train
        self.step = global_step
        self._epoch = current_epoch
        self._words = total_words_processed
Example #35
0
def mul_temperature(logits_BxN, temperature):
    logits_shape = tf.shape(logits_BxN)
    uniform_noise_BxN = tf.random_uniform(logits_shape)
    logits_BxN += -tf.log(-tf.log(uniform_noise_BxN)) * temperature
    return logits_BxN
Example #36
0
def inference(batch_placeholders, similarity_placeholder, init_word_embeds,
              word_to_num, num_to_word):
    print("Begin inference:")
    print("Creating variables")

    E = tf.Variable(init_word_embeds, dtype=tf.float32)
    W = tf.Variable(
        tf.random_uniform(
            [params.lstm_size, params.lstm_size, params.slice_size],
            minval=-1.0 / params.lstm_size,
            maxval=1.0 / params.lstm_size,
            name='W'))
    V = tf.Variable(
        tf.random_uniform([params.slice_size, 2 * params.lstm_size],
                          minval=-1.0 / (2 * params.lstm_size),
                          maxval=1.0 / (2 * params.lstm_size)))
    b = tf.Variable(tf.zeros([1, params.slice_size]), name='b')
    U = tf.Variable(
        tf.random_uniform([1, params.slice_size],
                          minval=-1.0 / params.slice_size,
                          maxval=1.0 / params.slice_size))
    lstm = createLSTM(params.lstm_size)
    print("Calcing sentences2vec")
    question_vec, pos_answer_vec, neg1, neg2, neg3 = tf.split(
        1, params.corrupt_size + 2, batch_placeholders)
    #scr_pos_answer, scr_neg1 , scr_neg2 , scr_neg3 = tf.split(1, params.corrupt_size+1,similarity_placeholder)
    #similarity_scores = tf.cast(similarity_placeholder, tf.float32)
    question_vec = tf.squeeze(question_vec)
    pos_answer_vec = tf.squeeze(pos_answer_vec)
    neg1 = tf.squeeze(neg1)
    neg2 = tf.squeeze(neg2)
    neg3 = tf.squeeze(neg3)
    #scr_pos_answer = tf.squeeze(scr_pos_answer)
    #scr_neg1 = tf.squeeze(scr_neg1)
    #scr_neg2 = tf.squeeze(scr_neg2)
    #scr_neg3 = tf.squeeze(scr_neg3)
    #question_vec = tf.reduce_mean(tf.gather(E,question_vec),1)
    question_vec = train_sentence2vectorLSTM(lstm, E, question_vec, False)
    pos_answer_vec = train_sentence2vectorLSTM(lstm, E, pos_answer_vec, True)
    neg1 = train_sentence2vectorLSTM(lstm, E, neg1, True)
    neg2 = train_sentence2vectorLSTM(lstm, E, neg2, True)
    neg3 = train_sentence2vectorLSTM(lstm, E, neg3, True)

    #new_p = tf.zeros([pparams.lstm_size+1])
    #pos_answer_vec = tf.reshape(pos_answer_vec, [-1])
    #print scr_pos_answer.get_shape
    #pos_answer_vec = tf.concat(1,[pos_answer_vec,scr_pos_answer])
    #neg1 = tf.concat(1,[neg1,scr_neg1])
    #neg2 = tf.concat(1,[neg2,scr_neg2])
    #neg3 = tf.concat(1,[neg3,scr_neg3])
    #pos_answer_vec = tf.reduce_mean(tf.gather(E, pos_answer_vec), 1)
    #neg1 = tf.reduce_mean(tf.gather(E, neg1), 1)
    #neg2 = tf.reduce_mean(tf.gather(E, neg2), 1)
    #neg3 = tf.reduce_mean(tf.gather(E, neg3), 1)

    tensors = []
    for i in range(params.slice_size):
        tensor = tf.reduce_sum(
            pos_answer_vec * tf.matmul(question_vec, W[:, :, i]), 1)
        tensors.append(tensor)

    score_pos = tf.pack(tensors)
    vec_concat = tf.transpose(
        tf.matmul(V, tf.transpose(tf.concat(1,
                                            [question_vec, pos_answer_vec]))))
    score_pos = tf.matmul(tf.nn.relu(tf.transpose(score_pos) + vec_concat + b),
                          tf.transpose(U))

    negative = []
    for i in [neg1, neg2, neg3]:
        tensors = []
        for j in range(params.slice_size):
            tensor = tf.reduce_sum(i * tf.matmul(question_vec, W[:, :, j]), 1)
            tensors.append(tensor)

        score_neg = tf.pack(tensors)
        vec_concat = tf.transpose(
            tf.matmul(V, tf.transpose(tf.concat(1, [question_vec, i]))))
        score_neg = tf.matmul(
            tf.nn.relu(tf.transpose(score_neg) + vec_concat + b),
            tf.transpose(U))
        negative.append(score_neg)

    return [score_pos, negative[0], negative[1], negative[2]]
Example #37
0
def adjective_embeddings(data_file, embeddings_file_name, num_steps, embedding_dim):
    # Specification of Training data:

    batch_size = 64  # Size of mini-batch for skip-gram model.
    embedding_size = embedding_dim  # Dimension of the embedding vector.
     # How many times to reuse an input to generate a label.
    num_sampled = 200  # Sample size for negative examples.
    logs_path = './log/'
    learning_rate_ = 0.01
    # Specification of test Sample:
    sample_size = 20  # Random sample of words to evaluate similarity.
    sample_window = 20  # Only pick samples in the head of the distribution.
    sample_examples = np.random.choice(sample_window, sample_size, replace=False)  # Randomly pick a sample of size 16

    f = open(data_file, 'rb')
    dictionary , reverse_dictionary,read_data,read_label  = pickle.load(f)
    print("ddddddd", reverse_dictionary)
    print()
    print("rrrrrrrr", read_label)
    print("wwwwwwww", read_data)
    batch = np.ndarray(shape=(batch_size), dtype=np.int32)
    labels = np.ndarray(shape=(batch_size, 1), dtype=np.int32)

    ## Constructing the graph...
    graph = tf.Graph()
    with graph.as_default():
        with tf.device('/cpu:0'):
            # Placeholders to read input data.
            with tf.name_scope('Inputs'):
                train_inputs = tf.placeholder(tf.int32, shape=[batch_size])
                train_labels = tf.placeholder(tf.int32, shape=[batch_size, 1])

            # Look up embeddings for inputs.
            with tf.name_scope('Embeddings'):
                sample_dataset = tf.constant(sample_examples, dtype=tf.int32)
                embeddings = tf.Variable(tf.random_uniform([vocabulary_size, embedding_size], -1.0, 1.0))
                embed = tf.nn.embedding_lookup(embeddings, train_inputs)

                # Construct the variables for the NCE loss
                nce_weights = tf.Variable(tf.truncated_normal([vocabulary_size, embedding_size],
                                                              stddev=1.0 / math.sqrt(embedding_size)))
                nce_biases = tf.Variable(tf.zeros([vocabulary_size]))

            # Compute the average NCE loss for the batch.
            # tf.nce_loss automatically draws a new sample of the negative labels each
            # time we evaluate the loss.
            with tf.name_scope('Loss'):
                loss = tf.reduce_mean(tf.nn.nce_loss(weights=nce_weights, biases=nce_biases,
                                                     labels=train_labels, inputs=embed,
                                                     num_sampled=num_sampled, num_classes=vocabulary_size))

            # Construct the Gradient Descent optimizer using a learning rate of 0.01.
            with tf.name_scope('Adam_Optimizer'):
                optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate_).minimize(loss)

            # Normalize the embeddings to avoid overfitting.
            with tf.name_scope('Normalization'):
                norm = tf.sqrt(tf.reduce_sum(tf.square(embeddings), 1, keep_dims=True))
                normalized_embeddings = embeddings / norm

            sample_embeddings = tf.nn.embedding_lookup(normalized_embeddings, sample_dataset)
            similarity = tf.matmul(sample_embeddings, normalized_embeddings, transpose_b=True)

            # Add variable initializer.
            init = tf.global_variables_initializer()

            # Create a summary to monitor cost tensor
            tf.summary.scalar("cost", loss)
            # Merge all summary variables.
            merged_summary_op = tf.summary.merge_all()

    with tf.Session(graph=graph) as session:
        # We must initialize all variables before we use them.
        session.run(init)
        summary_writer = tf.summary.FileWriter(logs_path, graph=tf.get_default_graph())

        print('Initializing the model')
        length = len(read_data)
        average_loss = 0
        for step in range(num_steps):
            print(step)
            batch_inputs = np.ndarray(shape=(batch_size), dtype=np.int32)
            batch_labels = np.ndarray(shape=(batch_size, 1), dtype=np.int32)
            aa = step * batch_size % length
            bb = 0
            for bb in range(batch_size):
                batch_inputs[bb] = read_data[aa]
                batch_labels[bb,0] = read_label[aa]
                aa = aa +1
                if aa == length:
                    aa =0
            # batch_inputs, batch_labels = train_inputs, train_labels
            feed_dict = {train_inputs: batch_inputs, train_labels: batch_labels}

            # We perform one update step by evaluating the optimizer op using session.run()
            _, loss_val, summary = session.run([optimizer, loss, merged_summary_op], feed_dict=feed_dict)

            summary_writer.add_summary(summary, step)
            average_loss += loss_val

            if step % 5000 == 0:
                if step > 0:
                    average_loss /= 5000

                    # The average loss is an estimate of the loss over the last 5000 batches.
                    print('Average loss at step ', step, ': ', average_loss)
                    average_loss = 0

            # Evaluate similarity after every 10000 iterations.
            if step % 10000 == 0:
                sim = similarity.eval()  #
                for i in range(sample_size):
                    sample_word = reverse_dictionary[sample_examples[i]]
                    top_k = 10  # Look for top-10 neighbours for words in sample set.
                    nearest = (-sim[i, :]).argsort()[1:top_k + 1]
                    print(top_k)
                    log_str = 'Nearest to %s:' % sample_word
                    for k in range(top_k):
                        print("22222222", nearest[k])
                        close_word = reverse_dictionary[nearest[k]]
                        # print("22222222", nearest[k])
                        log_str = '%s %s,' % (log_str, close_word)
                    print(log_str)
                print()
        final_embeddings = normalized_embeddings.eval()

        embedding_number = 0
        embedding_index_number_list = list()
        with open(embeddings_file_name, 'w') as outputfile:
            outputfile.write(str(len(final_embeddings)))
            outputfile.write(' ')
            outputfile.write(str(embedding_size))
            for i in range(len(final_embeddings)):
                outputfile.write('\n')
                outputfile.write(reverse_dictionary[i])
                for j in range(embedding_size):
                    outputfile.write(' ')
                    outputfile.write(str(round(final_embeddings[i][j],6)))
Example #38
0
    def train(self):
        loss_dis = -tf.reduce_mean(self.D_real) + tf.reduce_mean(self.D_fake)
        loss_gen = -tf.reduce_mean(self.D_fake)

        alpha = tf.random_uniform(shape=[self.batch_size, 1],
                                  minval=0.,
                                  maxval=1.)

        differences = self.g - self.x
        interpolates = self.x + alpha * differences
        gradients = tf.gradients(self._discriminator(interpolates),
                                 [interpolates])[0]
        slopes = tf.sqrt(
            tf.reduce_sum(tf.square(gradients), reduction_indices=[1]))
        gradient_penalty = tf.reduce_mean((slopes - 1.)**2)

        loss_dis += self.LAMBDA * gradient_penalty

        opt_dis = tf.train.AdamOptimizer(learning_rate=self.learning_rate,
                                         beta1=0.5,
                                         beta2=0.9).minimize(
                                             loss_dis,
                                             var_list=self.params_dis)
        opt_gen = tf.train.AdamOptimizer(learning_rate=self.learning_rate,
                                         beta1=0.5,
                                         beta2=0.9).minimize(
                                             loss_gen,
                                             var_list=self.params_gen)

        init = tf.global_variables_initializer()

        self.sess = tf.Session()
        self.sess.run(init)

        disp_step_num = 1000
        display_num = 10

        if not os.path.exists('out/'):
            os.makedirs('out/')
        fig_i = 0

        for step in range(self.step_num):
            for _ in range(5):
                xs, ys = self.data.train.next_batch(batch_size)
                zs = sample_z(self.batch_size, self.z_shape)
                _, l_dis = self.sess.run([opt_dis, loss_dis],
                                         feed_dict={
                                             self.z: zs,
                                             self.x: xs
                                         })

            zs = sample_z(self.batch_size, self.z_shape)
            _, l_gen = self.sess.run([opt_gen, loss_gen],
                                     feed_dict={self.z: zs})

            if step % 100 == 0:
                print('Step: {}, loss_dis = {:.5}, loss_gen = {:.5}'.format(
                    step, l_dis, l_gen))
            if step % disp_step_num == 0:
                fig = self._display()
                plt.savefig('out/{}.png'.format(str(fig_i).zfill(3)),
                            bbox_inches='tight')
                fig_i += 1
                plt.close(fig)

        self.sess.close()
Example #39
0
def word2vec(batch_gen):
    """ Build the graph for word2vec model and train it """
    # Step 1: define the placeholders for input and output
    # center_words have to be int to work on embedding lookup

    X = tf.placeholder(tf.int32, shape=[BATCH_SIZE], name="x-placeholder")
    Y = tf.placeholder(tf.int32, shape=[BATCH_SIZE, 1], name="y-placeholder")

    # Step 2: define weights. In word2vec, it's actually the weights that we care about
    # vocab size x embed size
    # initialized to random uniform -1 to 1
    matrix = tf.Variable(tf.random_uniform([VOCAB_SIZE, EMBED_SIZE], -1.0,
                                           1.0),
                         name="matrix")

    # TOO DO

    # Step 3: define the inference
    # get the embed of input words using tf.nn.embedding_lookup
    embed = tf.nn.embedding_lookup(matrix, X, name='embed')

    # Step 4: construct variables for NCE loss
    # tf.nn.nce_loss(weights, biases, labels, inputs, num_sampled, num_classes, ...)
    # nce_weight (vocab size x embed size), intialized to truncated_normal stddev=1.0 / (EMBED_SIZE ** 0.5)
    # bias: vocab size, initialized to 0

    weights = tf.Variable(tf.truncated_normal([VOCAB_SIZE, EMBED_SIZE],
                                              stddev=1.0 / (EMBED_SIZE**0.5)),
                          name="weight")
    bias = tf.Variable(tf.zeros([VOCAB_SIZE]), name="bias")

    # define loss function to be NCE loss function
    # tf.nn.nce_loss(weights, biases, labels, inputs, num_sampled, num_classes, ...)
    # need to get the mean accross the batch

    nce_loss = tf.nn.nce_loss(weights=weights,
                              biases=bias,
                              labels=Y,
                              inputs=embed,
                              num_sampled=NUM_SAMPLED,
                              num_classes=VOCAB_SIZE)
    loss = tf.reduce_mean(nce_loss)

    # Step 5: define optimizer

    optimizer = tf.train.GradientDescentOptimizer(LEARNING_RATE).minimize(loss)

    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())

        total_loss = 0.0  # we use this to calculate the average loss in the last SKIP_STEP steps
        writer = tf.summary.FileWriter('./my_graph/no_frills/', sess.graph)
        for index in xrange(NUM_TRAIN_STEPS):
            centers, targets = batch_gen.next()
            op, loss_batch = sess.run([optimizer, loss],
                                      feed_dict={
                                          X: centers,
                                          Y: targets
                                      })
            total_loss += loss_batch
            if (index + 1) % SKIP_STEP == 0:
                print('Average loss at step {}: {:5.1f}'.format(
                    index, total_loss / SKIP_STEP))
                total_loss = 0.0
        writer.close()
Example #40
0
import gym
import numpy as np
import random
import tensorflow as tf
import matplotlib.pyplot as plt

env = gym.make('FrozenLake-v0')
tf.reset_default_graph()
#These lines establish the feed-forward part of the network used to choose actions
inputs1 = tf.placeholder(shape=[1, 16], dtype=tf.float32)
W = tf.Variable(tf.random_uniform([16, 4], 0, 0.01))
Qout = tf.matmul(inputs1, W)
predict = tf.argmax(Qout, 1)

#Below we obtain the loss by taking the sum of squares difference between the target and prediction Q values.
nextQ = tf.placeholder(shape=[1, 4], dtype=tf.float32)
loss = tf.reduce_sum(tf.square(nextQ - Qout))
trainer = tf.train.GradientDescentOptimizer(learning_rate=0.1)
updateModel = trainer.minimize(loss)
init = tf.global_variables_initializer()

# Set learning parameters
y = .99
e = 0.1
num_episodes = 2000
#create lists to contain total rewards and steps per episode
jList = []
rList = []
with tf.Session() as sess:
    sess.run(init)
    for i in range(num_episodes):
    def __init__(
        self, sequence_length, num_classes, embedding_model: word2vec.WordVectors, filter_sizes, num_filters, l2_reg_lambda=0.0):

        vocab_size, embedding_size = embedding_model.vectors.shape[1]

        # Placeholders for input, output and dropout
        self.input_x = tf.placeholder(tf.int32, [None, sequence_length], name="input_x")
        self.input_y = tf.placeholder(tf.float32, [None, num_classes], name="input_y")
        self.dropout_keep_prob = tf.placeholder(tf.float32, name="dropout_keep_prob")

        # Keeping track of l2 regularization loss (optional)
        l2_loss = tf.constant(0.0)

        # Embedding layer
        with tf.device('/cpu:0'), tf.name_scope("embedding"):
            self.W = tf.Variable(
                tf.random_uniform([vocab_size, embedding_size], -1.0, 1.0),
                name="W")
            self.embedded_chars = tf.nn.embedding_lookup(self.W, self.input_x)
            self.embedded_chars_expanded = tf.expand_dims(self.embedded_chars, -1)

        # Create a convolution + maxpool layer for each filter size
        pooled_outputs = []
        for i, filter_size in enumerate(filter_sizes):
            with tf.name_scope("conv-maxpool-%s" % filter_size):
                # Convolution Layer
                filter_shape = [filter_size, embedding_size, 1, num_filters]
                W = tf.Variable(tf.truncated_normal(filter_shape, stddev=0.1), name="W")
                b = tf.Variable(tf.constant(0.1, shape=[num_filters]), name="b")
                conv = tf.nn.conv2d(
                    self.embedded_chars_expanded,
                    W,
                    strides=[1, 1, 1, 1],
                    padding="VALID",
                    name="conv")
                # Apply nonlinearity
                h = tf.nn.relu(tf.nn.bias_add(conv, b), name="relu")
                # Maxpooling over the outputs
                pooled = tf.nn.max_pool(
                    h,
                    ksize=[1, sequence_length - filter_size + 1, 1, 1],
                    strides=[1, 1, 1, 1],
                    padding='VALID',
                    name="pool")
                pooled_outputs.append(pooled)

        # Combine all the pooled features
        num_filters_total = num_filters * len(filter_sizes)
        self.h_pool = tf.concat(pooled_outputs, 3)
        self.h_pool_flat = tf.reshape(self.h_pool, [-1, num_filters_total])

        # Add dropout
        with tf.name_scope("dropout"):
            self.h_drop = tf.nn.dropout(self.h_pool_flat, self.dropout_keep_prob)

        # Final (unnormalized) scores and predictions
        with tf.name_scope("output"):
            W = tf.get_variable(
                "W",
                shape=[num_filters_total, num_classes],
                initializer=tf.contrib.layers.xavier_initializer())
            b = tf.Variable(tf.constant(0.1, shape=[num_classes]), name="b")
            l2_loss += tf.nn.l2_loss(W)
            l2_loss += tf.nn.l2_loss(b)
            self.scores = tf.nn.xw_plus_b(self.h_drop, W, b, name="scores")
            self.predictions = tf.argmax(self.scores, 1, name="predictions")

        # Calculate mean cross-entropy loss
        with tf.name_scope("loss"):
            losses = tf.nn.softmax_cross_entropy_with_logits(logits=self.scores, labels=self.input_y)
            self.loss = tf.reduce_mean(losses) + l2_reg_lambda * l2_loss

        # Accuracy
        with tf.name_scope("accuracy"):
            correct_predictions = tf.equal(self.predictions, tf.argmax(self.input_y, 1))
            self.accuracy = tf.reduce_mean(tf.cast(correct_predictions, "float"), name="accuracy")
Example #42
0
def random_phase_in_radians(shape, dtype):
  return np.pi * (2 * tf.random_uniform(shape, dtype=dtype) - 1.0)
Example #43
0
    def tf_augment_input_bbox(self, stacked_points, bboxes, batch_inds, config):

        # Parameter
        num_batches = batch_inds[-1] + 1

        ##########
        # Rotation
        ##########

        if config.augment_rotation == 'vertical':

            # Choose a random angle for each element
            theta = tf.random_uniform((num_batches,), minval=0, maxval=2*np.pi)

            # Rotation matrices
            c, s = tf.cos(theta), tf.sin(theta)
            cs0 = tf.zeros_like(c)
            cs1 = tf.ones_like(c)
            R = tf.stack([c, -s, cs0, s, c, cs0, cs0, cs0, cs1], axis=1)
            R = tf.reshape(R, (-1, 3, 3))

            # Create N x 3 x 3 rotation matrices to multiply with stacked_points
            stacked_rots = tf.gather(R, batch_inds)

            # Apply rotations
            stacked_points = tf.reshape(tf.matmul(tf.expand_dims(stacked_points, axis=1), stacked_rots), [-1, 3])

            # Apply rotations to bboxes
            new_centers = tf.expand_dims(bboxes[:, :, :3], axis=2)
            tmp_R = tf.tile(tf.expand_dims(R, axis=1), tf.shape(new_centers[:1, :, :1, :1]))
            new_centers = tf.matmul(new_centers, tmp_R)
            bboxes = tf.concat((tf.squeeze(new_centers), bboxes[:, :, :3]), axis=2)


        elif config.augment_rotation == 'none':
            R = tf.eye(3, batch_shape=(num_batches,))

        else:
            raise ValueError('Unknown rotation augmentation : ' + config.augment_rotation)

        #######
        # Scale
        #######

        # Choose random scales for each example
        min_s = config.augment_scale_min
        max_s = config.augment_scale_max

        if config.augment_scale_anisotropic:
            s = tf.random_uniform((num_batches, 3), minval=min_s, maxval=max_s)
            raise ValueError("Applying anisotropic scale augmentation to cylinders is not advised.")
        else:
            s = tf.random_uniform((num_batches, 1), minval=min_s, maxval=max_s)

        # Apply scale to height and radius before symmetries
        new_hr = bboxes[:, :, 3:] * tf.expand_dims(s, axis=2)

        if config.augment_symmetries:
            symetries = tf.round(tf.random_uniform((num_batches, 3))) * 2 - 1
            s = s * symetries

        # Create N x 3 vector of scales to multiply with stacked_points
        stacked_scales = tf.gather(s, batch_inds)

        # Apply scales
        stacked_points = stacked_points * stacked_scales

        # Apply scale to bboxes
        new_centers = bboxes[:, :, :3] * tf.expand_dims(s, axis=1)
        bboxes = tf.concat((new_centers, new_hr), axis=2)

        #######
        # Noise
        #######

        noise = tf.random_normal(tf.shape(stacked_points), stddev=config.augment_noise)
        stacked_points = stacked_points + noise

        return stacked_points, bboxes, s, R
Example #44
0
import tensorflow as tf
import numpy as np

# 使用 NumPy 生成假数据(phony data), 总共 100 个点.
x_data = np.float32(np.random.rand(2, 100)) # 随机输入
y_data = np.dot([0.100, 0.200], x_data) + 0.300

# 构造一个线性模型
# 
b = tf.Variable(tf.zeros([1]))
W = tf.Variable(tf.random_uniform([1, 2], -1.0, 1.0))
y = tf.matmul(W, x_data) + b

# 最小化方差
loss = tf.reduce_mean(tf.square(y - y_data))
optimizer = tf.train.GradientDescentOptimizer(0.5)
train = optimizer.minimize(loss)

# 初始化变量
init = tf.initialize_all_variables()

# 启动图 (graph)
sess = tf.Session()
sess.run(init)

# 拟合平面
for step in range(0, 201):
    sess.run(train)
    if step % 20 == 0:
        print(step, sess.run(W), sess.run(b))
Example #45
0
def sample_gumbel(shape, eps=1e-20): 
    """Sample from Gumbel(0, 1)"""
    U = tf.random_uniform(shape,minval=0,maxval=1)
    return -tf.log(-tf.log(U + eps) + eps)
Example #46
0
import tensorflow as tf

input1 = tf.constant([1.0, 2.0, 3.0], name='input1')
input2 = tf.Variable(tf.random_uniform([3]), name='input2')
output = tf.add_n([input1, input2], name='add')

# 生成一个写文件的writer,并将当前的TensorFlow计算图写入日志
writer = tf.summary.FileWriter('/home/dengkaiting/pycharm_project/DeepLearning/tensorflow_book/logs', tf.get_default_graph())
writer.close()


Example #47
0
def sample(logits):
    noise = tf.random_uniform(tf.shape(logits))
    return tf.argmax(logits - tf.log(-tf.log(noise)), 1)
Example #48
0
    def tf_augment_input(self, stacked_points, batch_inds, config):

        # Parameter
        num_batches = batch_inds[-1] + 1

        ##########
        # Rotation
        ##########

        if config.augment_rotation == 'vertical':

            # Choose a random angle for each element
            theta = tf.random_uniform((num_batches,), minval=0, maxval=2*np.pi)

            # Rotation matrices
            c, s = tf.cos(theta), tf.sin(theta)
            cs0 = tf.zeros_like(c)
            cs1 = tf.ones_like(c)
            R = tf.stack([c, -s, cs0, s, c, cs0, cs0, cs0, cs1], axis=1)
            R = tf.reshape(R, (-1, 3, 3))

            # Create N x 3 x 3 rotation matrices to multiply with stacked_points
            stacked_rots = tf.gather(R, batch_inds)

            # Apply rotations
            stacked_points = tf.reshape(tf.matmul(tf.expand_dims(stacked_points, axis=1), stacked_rots), [-1, 3])

        elif config.augment_rotation == 'none':
            R = tf.eye(3, batch_shape=(num_batches,))

        else:
            raise ValueError('Unknown rotation augmentation : ' + config.augment_rotation)

        #######
        # Scale
        #######

        # Choose random scales for each example
        min_s = config.augment_scale_min
        max_s = config.augment_scale_max

        if config.augment_scale_anisotropic:
            s = tf.random_uniform((num_batches, 3), minval=min_s, maxval=max_s)
        else:
            s = tf.random_uniform((num_batches, 1), minval=min_s, maxval=max_s)

        symmetries = []
        for i in range(3):
            if config.augment_symmetries[i]:
                symmetries.append(tf.round(tf.random_uniform((num_batches, 1))) * 2 - 1)
            else:
                symmetries.append(tf.ones([num_batches, 1], dtype=tf.float32))
        s *= tf.concat(symmetries, 1)

        # Create N x 3 vector of scales to multiply with stacked_points
        stacked_scales = tf.gather(s, batch_inds)

        # Apply scales
        stacked_points = stacked_points * stacked_scales

        #######
        # Noise
        #######

        noise = tf.random_normal(tf.shape(stacked_points), stddev=config.augment_noise)
        stacked_points = stacked_points + noise

        return stacked_points, s, R
def train_crbm2crbm(log_name, conv_size, input_size, chanl_input, chanl_output,
                    parameters):
    images_input, labels_input = inputs(train='train',
                                        batch_size=FLAGS.batch_size,
                                        num_epochs=FLAGS.num_epochs)
    #images=tf.reshape(images,[-1,input_size,input_size,chanl_input])
    W_conv1 = tf.placeholder("float",
                             [conv_size, conv_size, chanl_input, chanl_output])
    a_conv1 = tf.placeholder("float", [chanl_input])
    b_conv1 = tf.placeholder("float", [chanl_output])

    W_inc1 = tf.placeholder("float",
                            [conv_size, conv_size, chanl_input, chanl_output])
    a_inc1 = tf.placeholder("float", [chanl_input])
    b_inc1 = tf.placeholder("float", [chanl_output])

    W_extra1 = tf.placeholder("float", [11, 11, 1, 96])
    #a_extra1=tf.placeholder("float",[1])
    b_extra1 = tf.placeholder("float", [96])

    images_placeholder = tf.placeholder(tf.float32,
                                        shape=(gd.BATCH_SIZE, 227 * 227))
    images_extra = tf.reshape(images_placeholder, [-1, 227, 227, 1])

    h_conv1 = 1. / (
        1 + tf.exp(-conv2d(images_extra, W_extra1, 4, 'VALID') - b_extra1))
    norm1 = tf.nn.lrn(h_conv1,
                      5,
                      bias=1.0,
                      alpha=0.0001,
                      beta=0.75,
                      name='norm1')
    h_pool1 = max_pool(norm1, 3, 2, 'VALID')

    images = h_pool1
    print(images)

    pos_conv1_prob = 1. / (
        1 + tf.exp(-conv2d(h_pool1, W_conv1, 1, 'VALID') - b_conv1))
    pos_conv1_trans = tf.expand_dims(tf.reduce_mean(pos_conv1_prob, 0), 2)
    images_mean = tf.reduce_mean(images, 0)
    images_trans = tf.expand_dims(
        tf.reshape(
            tf.transpose(
                tf.reshape(tf.reduce_mean(images, 0), [-1, chanl_input])),
            [chanl_input, input_size, input_size]), 3)

    pos_prods_origin = conv2d(images_trans, pos_conv1_trans, 1, 'VALID')

    pos_prods_trans = tf.transpose(pos_prods_origin, [1, 2, 0, 3])
    print('pos_prods_trans:' + str(pos_prods_trans))

    pos_hid_act = tf.reduce_mean(pos_conv1_prob, 0)

    pos_vis_act = tf.reduce_mean(images, 0)
    #########################################################################3
    pos_hid_states = tf.to_float(
        tf.less_equal(tf.random_uniform(shape=tf.shape(pos_conv1_prob)),
                      pos_conv1_prob))

    #if pad_choose=="VALID":
    pos_hid_states_addpad = tf.pad(pos_hid_states,
                                   [[0, 0], [conv_size - 1, conv_size - 1],
                                    [conv_size - 1, conv_size - 1], [0, 0]],
                                   "CONSTANT")
    #else:

    W_transpose = tf.matrix_transpose(
        tf.reverse(W_conv1, [True, True, False, False]))

    print('pos_conv1_prob:' + str(pos_conv1_prob))
    neg_data = 1. / (1 + tf.exp(-tf.nn.conv2d_transpose(
        pos_conv1_prob,
        W_conv1, [gd.BATCH_SIZE, input_size, input_size, chanl_input],
        strides=[1, 1, 1, 1],
        padding='VALID') - a_conv1))

    #neg_data=1./(1+tf.exp(-conv2d_s1_valid(pos_hid_states_addpad,W_transpose)-a_conv1))
    #neg_data=
    print('neg_data' + str(neg_data))
    neg_hid_probs = 1. / (
        1 + tf.exp(-conv2d(neg_data, W_conv1, 1, 'VALID') - b_conv1))

    neg_data_trans = tf.expand_dims(
        tf.reshape(
            tf.transpose(
                tf.reshape(tf.reduce_mean(neg_data, 0), [-1, chanl_input])),
            [chanl_input, input_size, input_size]), 3)

    neg_hid_probs_trans = tf.expand_dims(tf.reduce_mean(neg_hid_probs, 0), 2)

    neg_prods_origin = conv2d(neg_data_trans, neg_hid_probs_trans, 1, 'VALID')

    neg_prods_trans = tf.transpose(neg_prods_origin, [1, 2, 0, 3])
    print('neg_prods_trans' + str(neg_prods_trans))
    neg_hid_act = tf.reduce_mean(neg_hid_probs, 0)
    neg_vis_act = tf.reduce_mean(neg_data, 0)

    err_sum = tf.reduce_sum(tf.square(images - neg_data))

    #reshaped_W=tf.transpose(tf.reshape(tf.transpose(tf.reshape(tf.squeeze(W_conv1),[-1,chanl_output])),[chanl_output*chanl_input,conv_size*conv_size]))

    W_inc_update = gd.momentum * W_inc1 + gd.epsilonw * (
        (pos_prods_trans - neg_prods_trans) / gd.BATCH_SIZE -
        weightcost * W_conv1)
    a_inc_update = gd.momentum * a_inc1 + (
        gd.epsilona / gd.BATCH_SIZE) * tf.reduce_mean(pos_vis_act -
                                                      neg_vis_act)
    b_inc_update = gd.momentum * b_inc1 + (
        gd.epsilonb / gd.BATCH_SIZE) * tf.reduce_mean(
            tf.reduce_mean((pos_hid_act - neg_hid_act), 0), 0)

    init_op = tf.initialize_all_variables()
    tf.scalar_summary('loss', err_sum)
    tf.scalar_summary('a', a_conv1[0])
    tf.scalar_summary('b', b_conv1[0])
    tf.scalar_summary('W', W_conv1[0][0][0][0])

    summary_op = tf.merge_all_summaries()

    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True

    with tf.Session(config=config) as sess:

        sess.run(init_op)
        # summary_writer=tf.train.SummaryWriter(FLAGS.train_dir,sess.graph)
        # coord=tf.train.Coordinator()
        # threads=tf.train.start_queue_runners(sess=sess,coord=coord)
        summary_writer = tf.train.SummaryWriter(FLAGS.tensorevents_dir,
                                                sess.graph)
        coord = tf.train.Coordinator()
        threads = tf.train.start_queue_runners(sess=sess, coord=coord)
        W_update_0 = np.random.normal(
            0, 0.1, [conv_size, conv_size, chanl_input, chanl_output])
        a_update_0 = np.zeros([chanl_input], np.float32)
        b_update_0 = np.zeros([chanl_output], np.float32)
        W_inc_update_0 = np.zeros(
            [conv_size, conv_size, chanl_input, chanl_output], np.float32)
        a_inc_update_0 = np.zeros([chanl_input], np.float32)
        b_inc_update_0 = np.zeros([chanl_output], np.float32)

        W_extra1_0 = parameters[0]
        a_extra1_0 = parameters[1]
        b_extra1_0 = parameters[2].reshape(chanl_input)

        try:
            step = 0

            while step < 10000:
                start_time = time.time()

                #print(images_input.eval(session=sess).shape)
                #print(a_update_0)

                # logfile=open(log_name,'a')
                # logfile.write("epoch: "+str(step)+'\n')
                # logfile.write("W:\n"+str(W_update_0[0])+'\n')

                images_wtf = images_input.eval(session=sess)

                # concat_img=Image.fromarray(
                #         tile_raster_images(
                #             X=images_wtf,
                #             img_shape=(32, 32),
                #             tile_shape=(10, 10)
                #         ))
                # concat_img.save(FLAGS.pic_dir+str(step)+'_train'+'.jpg')

                # logfile.close()

                W_inc_update_0, a_update_0, b_inc_update_0, loss, neg_data_out, images_out = sess.run(
                    [
                        W_inc_update, a_inc_update, b_inc_update, err_sum,
                        neg_data, images
                    ],
                    feed_dict={
                        images_placeholder: images_wtf,
                        W_conv1: W_update_0,
                        a_conv1: a_update_0,
                        b_conv1: b_update_0,
                        W_inc1: W_inc_update_0,
                        a_inc1: a_inc_update_0,
                        b_inc1: b_inc_update_0,
                        W_extra1: W_extra1_0,
                        b_extra1: b_extra1_0
                    })

                W_update_0 = W_update_0 + W_inc_update_0
                a_update_0 = a_update_0 + a_inc_update_0
                b_update_0 = b_update_0 + b_inc_update_0

                # logfile=open(log_name,'a')
                # logfile.write("epoch: "+str(step)+'\n')
                # logfile.write("W_inc:\n"+str(W_inc_update_0[0])+'\n')

                # logfile.close()

                #print('step '+str(step)+": loss="+str(loss)+'\n')
                print("step %d: loss = %d" % (step, loss))

                if step % 10 == 0:
                    logfile = open(log_name, 'a')
                    logfile.write('step ' + str(step) + ": loss=" + str(loss) +
                                  '\n')
                    logfile.write("W:\n" + str(W_update_0[0]) + '\n')
                    #logfile.write("W_inc:\n"+str(W_inc_update_0[0])+'\n')
                    logfile.close()

                    # print(to_image(neg_data_out).shape)
                    # print("images_out"+str(to_image(images_out).shape))

                    # weight_img=Image.fromarray(
                    #        tile_raster_images(
                    #            X=reshaped_W_out.T,
                    #            img_shape=(conv_size, conv_size),
                    #            tile_shape=(chanl_input, chanl_output),
                    #        ))
                    # weight_img.save(FLAGS.Weight_dir+'weight_'+str(step)+'.jpg')

                    summary_str = sess.run(summary_op,
                                           feed_dict={
                                               images_placeholder: images_wtf,
                                               W_conv1: W_update_0,
                                               a_conv1: a_update_0,
                                               b_conv1: b_update_0,
                                               W_inc1: W_inc_update_0,
                                               a_inc1: a_inc_update_0,
                                               b_inc1: b_inc_update_0,
                                               W_extra1: W_extra1_0,
                                               b_extra1: b_extra1_0
                                           })
                    summary_writer.add_summary(summary_str, step)

                    if step % 50 == 0:
                        save_fn = FLAGS.log_dir + '/parameters_layer2_epoch_' + str(
                            step) + '.mat'
                        sio.savemat(
                            save_fn, {
                                'W1': W_extra1_0,
                                'b1': b_extra1_0,
                                'W2': W_update_0,
                                'b2': b_update_0
                            })
                        saveimg = Image.fromarray(
                            255 * to_image(images_out)[:, :, 0])
                        #print(to_image(images_input.eval(session=sess).reshape(gd.BATCH_SIZE,input_size,input_size,in)).shape)
                        saveimg = saveimg.convert('RGB')
                        saveimg.save(FLAGS.pic_dir + 'imag_layer1_epoch' +
                                     str(step) + '.jpg')

                        saveimg_negv = Image.fromarray(
                            255 * to_image(neg_data_out)[:, :, 0])
                        #print(to_image(neg_data_out).shape)
                        saveimg_negv = saveimg_negv.convert('RGB')
                        saveimg_negv.save(FLAGS.pic_dir + 'negv_layer1_epoch' +
                                          str(step) + '.jpg')

                step += 1
                # if step==100:
                # 	return W_update_0,a_update_0,b_update_0
        except tf.errors.OutOfRangeError:
            print('Done training for %d epochs, %d steps.' % (1001, step))
        finally:
            coord.request_stop()

        coord.join(threads)
        sess.close()
        return W_update_0, a_update_0, b_update_0
  def _get_exchanged_states(self, old_states, exchange_proposed,
                            exchange_proposed_n, sampled_replica_states,
                            sampled_replica_results):
    """Get list of TensorArrays holding exchanged states, and zeros."""
    with tf.name_scope('get_exchanged_states'):

      target_log_probs = []
      for replica in range(self.num_replica):
        replica_log_prob = _get_field(sampled_replica_results[replica],
                                      'target_log_prob')
        inverse_temp = self.inverse_temperatures[replica]
        target_log_probs.append(replica_log_prob / inverse_temp)
      target_log_probs = tf.stack(target_log_probs, axis=0)

      dtype = target_log_probs.dtype
      num_state_parts = len(sampled_replica_states[0])
      # exchanged_states[k][i] is Tensor of (new) state part k, for replica i.
      # The `k` will be known statically, and `i` is a Tensor.
      # We will insert values into indices `i` for every replica with a proposed
      # exchange.
      exchanged_states = [
          tf.TensorArray(
              dtype,
              size=self.num_replica,
              dynamic_size=False,
              tensor_array_name='exchanged_states',
              # State part k has same shape, regardless of replica.  So use 0.
              element_shape=sampled_replica_states[0][k].shape)
          for k in range(num_state_parts)
      ]

      # Draw random variables here, to avoid sampling in the loop (and losing
      # reproducibility).  This may mean we sample too many, but we will always
      # have enough.
      sample_shape = tf.concat(
          ([self.num_replica // 2], tf.shape(target_log_probs)[1:]), axis=0)
      log_uniforms = tf.log(
          tf.random_uniform(
              shape=sample_shape, dtype=dtype, seed=self._seed_stream()))

      def _swap(is_exchange_accepted, x, y):
        """Swap batches of x, y where accepted."""
        with tf.name_scope('swap_where_exchange_accepted'):
          new_x = mcmc_util.choose(is_exchange_accepted, y, x)
          new_y = mcmc_util.choose(is_exchange_accepted, x, y)
        return new_x, new_y

      def cond(i, unused_exchanged_states):
        return i < exchange_proposed_n

      def body(i, exchanged_states):
        """Body of while loop for exchanging states."""
        # Propose exchange between replicas indexed by m and n.
        m, n = tf.unstack(exchange_proposed[i])

        # Construct log_accept_ratio:  -temp_diff * target_log_prob_diff.
        # Note target_log_prob_diff = -EnergyDiff (common definition is in terms
        # of energy).
        temp_diff = self.inverse_temperatures[m] - self.inverse_temperatures[n]
        # Difference of target log probs may be +- Inf or NaN.  We want the
        # product of this with the temperature difference to have "alt value" of
        # -Inf.
        log_accept_ratio = mcmc_util.safe_sum(
            [-temp_diff * target_log_probs[m], temp_diff * target_log_probs[n]])

        is_exchange_accepted = log_uniforms[i] < log_accept_ratio

        is_exchange_accepted = tf.Print(
            is_exchange_accepted, [
                'is_exchange_accepted: ',
                is_exchange_accepted,
                'temp_diff: ',
                temp_diff,
                'log_accept_ratio: ',
                log_accept_ratio,
            ],
            summarize=2,
            first_n=0)

        for k in range(num_state_parts):
          new_m, new_n = _swap(is_exchange_accepted, old_states[k].read(m),
                               old_states[k].read(n))
          exchanged_states[k] = exchanged_states[k].write(m, new_m)
          exchanged_states[k] = exchanged_states[k].write(n, new_n)

        return i + 1, exchanged_states

      # At this point, exchanged_states[k] is a length num_replicas TensorArray.
      return tf.while_loop(cond, body,
                           [tf.constant(0), exchanged_states])[1]  # Remove `i`
Example #51
0
def random_uniform(*args, **kwargs):
    if hasattr(tf, 'random') and hasattr(tf.random, 'set_seed'):
        tf.random.set_seed(12345)
        return tf.random.uniform(*args, **kwargs)
    tf.set_random_seed(12345)
    return tf.random_uniform(*args, **kwargs)
        output = LeakyReLU(output)
        output = tf.layers.dropout(output, rate=.2)

        output = lib.ops.linear.Linear('Discriminator.Output', 512, 1, output)

        return tf.reshape(output, [-1])


'''
losses
'''
real_x_int = tf.placeholder(tf.int32, shape=[BATCH_SIZE, OUTPUT_DIM])
real_x = tf.reshape(2 * ((tf.cast(real_x_int, tf.float32) / 256.) - .5),
                    [BATCH_SIZE, OUTPUT_DIM])
real_x += tf.random_uniform(shape=[BATCH_SIZE, OUTPUT_DIM],
                            minval=0.,
                            maxval=1. / 128)  # dequantize
q_z = Extractor(real_x)
q_k_logits, q_k = HyperExtractor(q_z)
q_k_probs = tf.nn.softmax(q_k_logits)
rec_x = Generator(q_z)
hyper_p_z = tf.random_normal([BATCH_SIZE, DIM_LATENT])
hyper_p_k = tf.one_hot(indices=prior_k.sample(BATCH_SIZE), depth=N_COMS)
p_z = HyperGenerator(hyper_p_k, hyper_p_z)
fake_x = Generator(p_z)

if MODE in ['local_ep', 'local_epce']:
    disc_fake, disc_real = [], []
    disc_fake.append(HyperDiscriminator(p_z, hyper_p_k))
    disc_real.append(HyperDiscriminator(q_z, q_k))
    disc_fake.append(Discriminator(fake_x, p_z))
encoder_inputs = tf.placeholder(shape=(None, None),
                                dtype=tf.int32,
                                name='encoder_inputs')
#contains the lengths for each of the sequence in the batch, we will pad so all the same
#if you don't want to pad, check out dynamic memory networks to input variable length sequences
encoder_inputs_length = tf.placeholder(shape=(None, ),
                                       dtype=tf.int32,
                                       name='encoder_inputs_length')
decoder_targets = tf.placeholder(shape=(None, None),
                                 dtype=tf.int32,
                                 name='decoder_targets')

#randomly initialized embedding matrrix that can fit input sequence
#used to convert sequences to vectors (embeddings) for both encoder and decoder of the right size
#reshaping is a thing, in TF you gotta make sure you tensors are the right shape (num dimensions)
embeddings = tf.Variable(tf.random_uniform([vocab_size, input_embedding_size],
                                           -1.0, 1.0),
                         dtype=tf.float32)

#this thing could get huge in a real world application
encoder_inputs_embedded = tf.nn.embedding_lookup(embeddings, encoder_inputs)

from tensorflow.python.ops.rnn_cell import LSTMCell, LSTMStateTuple

encoder_cell = LSTMCell(encoder_hidden_units)

#get outputs and states
#bidirectional RNN function takes a separate cell argument for
#both the forward and backward RNN, and returns separate
#outputs and states for both the forward and backward RNN

#When using a standard RNN to make predictions we are only taking the “past” into account.
Example #54
0
import tensorflow as tf
import math

vocabulary_size = 10000
embedding_size = 128
examples = [3, 3, 3, 3, 10, 10, 10, 10]
labels = [2, 1, 3, 5, 3, 5, 6, 82]
batch_size = 8
num_samples = 8  #num_samples 为采样个数

###构建计算流图
# 首先定义词向量矩阵,也称为 embedding matrix,这个是我们需要通过训练得到的词向量,其中vocabulary_size表示词典大小,
# embedding_size表示词向量的维度,那么词向量矩阵为 vocabulary_size × embedding_size,利用均匀分布对它进行随机初始化:
embeddings = tf.Variable(
    tf.random_uniform([vocabulary_size, embedding_size], -1.0, 1.0))

#定义权值矩阵和偏置向量,并初始化为0:
weights = tf.Variable(
    tf.truncated_normal([vocabulary_size, embedding_size],
                        stddev=1.0 / math.sqrt(embedding_size)))
biases = tf.Variable(tf.zeros([vocabulary_size]))

#给定一个batch的输入,从词向量矩阵中找到对应的向量表示,以及从权值矩阵和偏置向量中找到对应正确输出的参数,
# 其中examples是输入词,labels为对应的正确输出,一维向量表示,每个元素为词在字典中编号:
# Embeddings for examples: [batch_size, embedding_size]
example_emb = tf.nn.embedding_lookup(embeddings, examples)
# Weights for labels: [batch_size, embedding_size]
true_w = tf.nn.embedding_lookup(weights, labels)
# Biases for labels: [batch_size, 1]
true_b = tf.nn.embedding_lookup(biases, labels)
Example #55
0
def build_model(sess, graph, loss_model):
    """
  Builds a tensor graph model
  """
    model = None
    with graph.as_default():
        # Ops and variables pinned to the CPU because of missing GPU implementation
        with tf.device('/cpu:0'):
            # Input data.
            train_inputs = tf.placeholder(tf.int32, shape=[batch_size])
            train_labels = tf.placeholder(tf.int32, shape=[batch_size, 1])
            valid_dataset = tf.constant(valid_examples, dtype=tf.int32)

            global_step = tf.Variable(0, trainable=False)

            # Look up embeddings for inputs.
            embeddings = tf.Variable(
                tf.random_uniform([vocabulary_size, embedding_size], -1.0,
                                  1.0))
            embed = tf.nn.embedding_lookup(embeddings, train_inputs)

            sm_weights = tf.Variable(
                tf.truncated_normal([vocabulary_size, embedding_size],
                                    stddev=1.0 / math.sqrt(embedding_size)))

            # Get context embeddings from lables
            true_w = tf.nn.embedding_lookup(sm_weights, train_labels)
            true_w = tf.reshape(true_w, [-1, embedding_size])

            # Construct the variables for the NCE loss
            nce_weights = tf.Variable(
                tf.truncated_normal([vocabulary_size, embedding_size],
                                    stddev=1.0 / math.sqrt(embedding_size)))
            nce_biases = tf.Variable(tf.zeros([vocabulary_size]))

        if loss_model == 'cross_entropy':
            loss = tf.reduce_mean(tf_func.cross_entropy_loss(embed, true_w))
        else:
            # sample negative examples with unigram probability
            sample = np.random.choice(vocabulary_size,
                                      num_sampled,
                                      p=unigram_prob,
                                      replace=False)

            loss = tf.reduce_mean(
                tf_func.nce_loss(embed, nce_weights, nce_biases, train_labels,
                                 sample, unigram_prob))

        # tf.summary.scalar('loss', loss)

        # Construct the SGD optimizer using a learning rate of 1.0.
        optimizer = tf.train.GradientDescentOptimizer(1).minimize(
            loss, global_step=global_step)

        # Compute the cosine similarity between minibatch examples and all embeddings.
        norm = tf.sqrt(tf.reduce_sum(tf.square(embeddings), 1, keep_dims=True))
        normalized_embeddings = embeddings / norm

        valid_embeddings = tf.nn.embedding_lookup(normalized_embeddings,
                                                  valid_dataset)
        similarity = tf.matmul(valid_embeddings,
                               normalized_embeddings,
                               transpose_b=True)

        saver = tf.train.Saver(tf.global_variables())

        # Save summary
        # summary = tf.summary.merge_all()
        # summary_writer = tf.summary.FileWriter(summary_path + '/summary', sess.graph)
        summary = None
        summary_writer = None

        tf.global_variables_initializer().run()
        print("Initialized")

    model = Word2Vec(train_inputs, train_labels, loss, optimizer, global_step,
                     embeddings, normalized_embeddings, valid_embeddings,
                     similarity, saver, summary, summary_writer)

    return model
def autoencoder(input_shape=[None, 784],
                n_filters=[1, 10, 10, 10],
                filter_sizes=[3, 3, 3, 3],
                corruption=False):
    """Build a deep denoising autoencoder w/ tied weights.

    Parameters
    ----------
    input_shape : list, optional
        Description
    n_filters : list, optional
        Description
    filter_sizes : list, optional
        Description

    Returns
    -------
    x : Tensor
        Input placeholder to the network
    z : Tensor
        Inner-most latent representation
    y : Tensor
        Output reconstruction of the input
    cost : Tensor
        Overall cost to use for training

    Raises
    ------
    ValueError
        Description
    """
    # %%
    # input to the network
    x = tf.placeholder(tf.float32, input_shape, name='x')

    # %%
    # Optionally apply denoising autoencoder
    if corruption:
        x_noise = corrupt(x)
    else:
        x_noise = x

    # %%
    # ensure 2-d is converted to square tensor.
    if len(x.get_shape()) == 2:
        x_dim = np.sqrt(x_noise.get_shape().as_list()[1])
        if x_dim != int(x_dim):
            raise ValueError('Unsupported input dimensions')
        x_dim = int(x_dim)
        x_tensor = tf.reshape(x_noise, [-1, x_dim, x_dim, n_filters[0]])
    elif len(x_noise.get_shape()) == 4:
        x_tensor = x_noise
    else:
        raise ValueError('Unsupported input dimensions')
    current_input = x_tensor

    # %%
    # Build the encoder
    encoder = []
    shapes = []
    for layer_i, n_output in enumerate(n_filters[1:]):
        n_input = current_input.get_shape().as_list()[3]
        shapes.append(current_input.get_shape().as_list())
        W = tf.Variable(
            tf.random_uniform([
                filter_sizes[layer_i], filter_sizes[layer_i], n_input, n_output
            ], -1.0 / math.sqrt(n_input), 1.0 / math.sqrt(n_input)))
        b = tf.Variable(tf.zeros([n_output]))
        encoder.append(W)
        output = lrelu(
            tf.add(
                tf.nn.conv2d(current_input,
                             W,
                             strides=[1, 2, 2, 1],
                             padding='SAME'), b))
        current_input = output

    # %%
    # store the latent representation
    z = current_input
    encoder.reverse()
    shapes.reverse()

    # %%
    # Build the decoder using the same weights
    for layer_i, shape in enumerate(shapes):
        W = encoder[layer_i]
        b = tf.Variable(tf.zeros([W.get_shape().as_list()[2]]))
        output = lrelu(
            tf.add(
                tf.nn.deconv2d(
                    current_input,
                    W,
                    tf.pack([tf.shape(x)[0], shape[1], shape[2], shape[3]]),
                    strides=[1, 2, 2, 1],
                    padding='SAME'), b))
        current_input = output

    # %%
    # now have the reconstruction through the network
    y = current_input
    # cost function measures pixel-wise difference
    cost = tf.reduce_sum(tf.square(y - x_tensor))

    # %%
    return {'x': x, 'z': z, 'y': y, 'cost': cost}
Example #57
0
 def _sample(logits: tf.Tensor):
     uniform = tf.random_uniform(tf.shape(logits))
     return tf.argmax(logits - tf.log(-tf.log(uniform)), axis=-1, name="action")
Example #58
0
def xavier_init(fan_in, fan_out, constant=1):
    low=-constant*np.sqrt(6.0/(fan_in+fan_out))
    high=constant*np.sqrt(6.0/(fan_in+fan_out))
    return tf.random_uniform((fan_in, fan_out), minval=low, maxval=high, dtype=tf.float32)
Example #59
0
    def __init__(
      self, sequence_length, num_classes, vocab_size, embedding_size, num_hidden, batch_size, init_state, cell_type):

        # Placeholders for input, output and dropout
        self.input_x = tf.placeholder(tf.int32, [None, sequence_length], name="input_x")
        self.input_y = tf.placeholder(tf.int32, [None, num_classes], name="input_y")
        self.dropout_keep_prob = tf.placeholder(tf.float32, name="dropout_keep_prob")

        # Embedding layer
        with tf.device('/cpu:0'), tf.name_scope("embedding"):
            W = tf.Variable(
                tf.random_uniform([vocab_size, embedding_size], -1.0, 1.0),
                name="W")
            self.embedded_words = tf.nn.embedding_lookup(W, self.input_x) #[batch, n_timesteps, n_inputs]

        # rnn layer
        with tf.device('/cpu:0'), tf.name_scope("rnn"):
            if cell_type == 'vanlia':
			    # create a BasicRNNCell
                self.rnn_cell = tf.contrib.rnn.BasicLSTMCell(num_hidden)
            elif cell_type == 'lstm':
                # create a LSTMCell
                self.rnn_cell = tf.nn.rnn_cell.LSTMCell(num_hidden)
            elif cell_type == 'gru':
                # create a GRUCell
                self.rnn_cell = tf.nn.rnn_cell.GRUCell(num_hidden)
            else:
			    # create a BasicRNNCell
                self.rnn_cell = tf.contrib.rnn.BasicLSTMCell(num_hidden)
			# 'outputs' is a tensor of shape [batch_size, max_time, cell_state_size]

        # cal rnn layer
        with tf.name_scope("rnn"):
            if init_state is True:
                ## Use Initial State
			    # defining initial state
                self.initial_state = self.rnn_cell.zero_state(batch_size, dtype=tf.float32)
			    # 'state' is a tensor of shape [batch_size, cell_state_size]
                # print('\nself.embedded_words:{}\n'.format(np.shape(self.embedded_words)))
                self.outputs, states = tf.nn.dynamic_rnn(self.rnn_cell, self.embedded_words, initial_state=self.initial_state, dtype=tf.float32)
            else:
            ## Do Not Use Initial State
                self.outputs, states = tf.nn.dynamic_rnn(self.rnn_cell, self.embedded_words, dtype=tf.float32)

        # Final (unnormalized) scores and predictions
        with tf.name_scope("output"):
            W = tf.get_variable(
                "W",
                shape=[num_hidden, num_classes],
                initializer=tf.contrib.layers.xavier_initializer())
            b = tf.Variable(tf.constant(0.1, shape=[num_classes]), name="b")
            
            self.transpose_outputs = tf.transpose(self.outputs, perm=[1, 0, 2]) #[n_timesteps, batch, n_inputs]

            self.scores = tf.nn.xw_plus_b(self.transpose_outputs[-1], W, b, name="scores")
            self.predictions = tf.argmax(self.scores, 1, name="predictions")
            # print('\npredictions:{}\n'.format(np.shape(self.predictions)))

        # Calculate mean cross-entropy loss
        with tf.name_scope("loss"):
            losses = tf.nn.softmax_cross_entropy_with_logits(logits=self.scores, labels=self.input_y)
            self.loss = tf.reduce_mean(losses)

        # Accuracy
        with tf.name_scope("accuracy"):
            correct_predictions = tf.equal(self.predictions, tf.argmax(self.input_y, 1))
            self.accuracy = tf.reduce_mean(tf.cast(correct_predictions, "float"), name="accuracy")
Example #60
0
def resnet_v2_200(inputs,
                  num_classes=None,
                  global_pool=True,
                  reuse=None,
                  scope='resnet_v2_200'):
    blocks = [
        Block('block1', bottleneck, [(256, 64, 1)] * 2 + [(256, 64, 2)]),
        Block('block2', bottleneck, [(512, 128, 1)] * 23 + [(512, 128, 2)]),
        Block('block3', bottleneck, [(1024, 256, 1)] * 35 + [(1024, 256, 2)]),
        Block('block4', bottleneck, [(2048, 512, 1)] * 3)
    ]
    return resnet_v2(inputs,
                     blocks,
                     num_classes,
                     global_pool,
                     include_root_block=True,
                     reuse=reuse,
                     scope=scope)


batch_size = 32
height, width = 224, 224
inputs = tf.random_uniform((batch_size, height, width, 3))
with slim.arg_scope(resnet_arg_scope(is_training=False)):
    nets, end_points = resnet_v2_101(inputs, 1000)
init = tf.global_variables_initializer()
sess = tf.Session()
sess.run(init)
num_batches = 100
time_tensorflow(sess, nets, 'Forword')