def main(args):
  # We init as h=x
  W = tf.Variable([1], dtype=tf.float32)
  b = tf.Variable([0], dtype=tf.float32)
  x = tf.placeholder(tf.float32)
  h = W * x + b
  
  init = tf.global_variables_initializer()
  sess = tf.Session()
  #sess.run(init)
  #print("hyposis init:", sess.run(h, {x:[1,2,3,4]}))

  y = tf.placeholder(tf.float32)
  squared_deltas = tf.square(h - y)
  cost = 0.5 * tf.reduce_mean(squared_deltas)
  #print("cost init:", sess.run(cost, {x:[1,2,3,4], y:[0,-1,-2,-3]}))

  fixW = tf.assign(W, [-1.])
  fixb = tf.assign(b, [1.])
  sess.run([fixW, fixb])
  #print("W, b, cost expected:", sess.run([fixW, fixb, cost], {x:[1,2,3,4], y:[0,-1,-2,-3]}))

  # linear regression 
  sess.run(init)#assign
  optimizer = tf.train.GradientDescentOptimizer(0.01)
  train = optimizer.minimize(cost)
  
  for i in range(10000):
    sess.run(train, {x:[1,2,3,4,-3,35], y:[0,-1,-2,-3,4,-34]})
  
  curr_W, curr_b, curr_loss = sess.run([W, b, cost], {x:[1,2,3,4,-3,35], y:[0,-1,-2,-3,4,-34]})
  
  print("W, b, cost learned: ", curr_W, curr_b, curr_loss)
Beispiel #2
0
    def _build_network(self):
        self._initPlaceholders()

        self.count_states = tf.Variable(initial_value=0, trainable=False, dtype=tf.int64, name='count_states') # this variables stores the number of states

        # Similarly, this is the counter for the number of episodes.
        self.count_episodes = tf.Variable(initial_value=0, trainable=False, dtype=tf.int64, name='count_episodes') # this variables stores the number of states

        # TensorFlow operation for increasing count_states.
        self.count_states_increase = tf.assign(self.count_states, self.count_states + 1)

        # TensorFlow operation for increasing count_episodes.
        self.count_episodes_increase = tf.assign(self.count_episodes, self.count_episodes + 1)               

        self.q_values = create_conv_model(self.states, self.config, self.num_actions)
        error = tf.losses.mean_squared_error(self.q_values_target * self.actions,self.q_values * self.actions)
        self.loss = error

        self.learning_rate_op = tf.maximum(self.learning_rate_minimum,
          tf.train.exponential_decay(
              self.learning_rate,
              self.learning_rate_step,
              self.learning_rate_decay_step,
              self.learning_rate_decay,
              staircase=True))

        self.optim = tf.train.RMSPropOptimizer(self.learning_rate_op, momentum=0.95, epsilon=0.01).minimize(self.loss)

        self.saver = tf.train.Saver()
        self.sess = tf.Session()
        self.load_checkpoint()
        self.summary = tf.summary.merge_all()
def batch_norm(x, name_scope, training, epsilon=1e-3, decay=0.999):
    """Assume 2d [batch, values] tensor"""

    with tf.variable_scope(name_scope):
        size = x.get_shape().as_list()[1]

        scale = tf.get_variable('scale', [size],
            initializer=tf.constant_initializer(0.1))
        offset = tf.get_variable('offset', [size])

        pop_mean = tf.get_variable('pop_mean', [size],
            initializer=tf.zeros_initializer(),
            trainable=False)
        pop_var = tf.get_variable('pop_var', [size],
            initializer=tf.ones_initializer(),
            trainable=False)
        batch_mean, batch_var = tf.nn.moments(x, [0])

        train_mean_op = tf.assign(
            pop_mean,
            pop_mean * decay + batch_mean * (1 - decay))
        train_var_op = tf.assign(
            pop_var,
            pop_var * decay + batch_var * (1 - decay))

        def batch_statistics():
            with tf.control_dependencies([train_mean_op, train_var_op]):
                return tf.nn.batch_normalization(x, batch_mean, batch_var, offset, scale, epsilon)

        def population_statistics():
            return tf.nn.batch_normalization(x, pop_mean, pop_var, offset, scale, epsilon)

        return tf.cond(training, batch_statistics, population_statistics)
Beispiel #4
0
  def test_capture(self):
    global_step = tf.contrib.framework.get_or_create_global_step()
    # Some test computation
    some_weights = tf.get_variable("weigths", [2, 128])
    computation = tf.nn.softmax(some_weights)

    hook = hooks.MetadataCaptureHook(
        params={"step": 5}, model_dir=self.model_dir,
        run_config=tf.contrib.learn.RunConfig())
    hook.begin()

    with self.test_session() as sess:
      sess.run(tf.global_variables_initializer())
      #pylint: disable=W0212
      mon_sess = monitored_session._HookedSession(sess, [hook])
      # Should not trigger for step 0
      sess.run(tf.assign(global_step, 0))
      mon_sess.run(computation)
      self.assertEqual(gfile.ListDirectory(self.model_dir), [])
      # Should trigger *after* step 5
      sess.run(tf.assign(global_step, 5))
      mon_sess.run(computation)
      self.assertEqual(gfile.ListDirectory(self.model_dir), [])
      mon_sess.run(computation)
      self.assertEqual(
          set(gfile.ListDirectory(self.model_dir)),
          set(["run_meta", "tfprof_log", "timeline.json"]))
 def fit(self, xs, ys):
     if self.normalize_inputs:
         # recompute normalizing constants for inputs
         new_mean = np.mean(xs, axis=0, keepdims=True)
         new_std = np.std(xs, axis=0, keepdims=True) + 1e-8
         tf.get_default_session().run(tf.group(
             tf.assign(self.x_mean_var, new_mean),
             tf.assign(self.x_std_var, new_std),
         ))
     if self.use_trust_region and self.first_optimized:
         old_prob = self.f_prob(xs)
         inputs = [xs, ys, old_prob]
         optimizer = self.tr_optimizer
     else:
         inputs = [xs, ys]
         optimizer = self.optimizer
     loss_before = optimizer.loss(inputs)
     if self.name:
         prefix = self.name + "_"
     else:
         prefix = ""
     logger.record_tabular(prefix + 'LossBefore', loss_before)
     optimizer.optimize(inputs)
     loss_after = optimizer.loss(inputs)
     logger.record_tabular(prefix + 'LossAfter', loss_after)
     logger.record_tabular(prefix + 'dLoss', loss_before - loss_after)
     self.first_optimized = True
 def testReuseVars(self):
   height, width = 3, 3
   with self.test_session() as sess:
     image_shape = (10, height, width, 3)
     image_values = np.random.rand(*image_shape)
     expected_mean = np.mean(image_values, axis=(0, 1, 2))
     expected_var = np.var(image_values, axis=(0, 1, 2))
     images = tf.constant(image_values, shape=image_shape, dtype=tf.float32)
     output = ops.batch_norm(images, decay=0.1, is_training=False)
     update_ops = tf.get_collection(ops.UPDATE_OPS_COLLECTION)
     with tf.control_dependencies(update_ops):
       barrier = tf.no_op(name='gradient_barrier')
       output = control_flow_ops.with_dependencies([barrier], output)
     # Initialize all variables
     sess.run(tf.global_variables_initializer())
     moving_mean = variables.get_variables('BatchNorm/moving_mean')[0]
     moving_variance = variables.get_variables('BatchNorm/moving_variance')[0]
     mean, variance = sess.run([moving_mean, moving_variance])
     # After initialization moving_mean == 0 and moving_variance == 1.
     self.assertAllClose(mean, [0] * 3)
     self.assertAllClose(variance, [1] * 3)
     # Simulate assigment from saver restore.
     init_assigns = [tf.assign(moving_mean, expected_mean),
                     tf.assign(moving_variance, expected_var)]
     sess.run(init_assigns)
     for _ in range(10):
       sess.run([output], {images: np.random.rand(*image_shape)})
     mean = moving_mean.eval()
     variance = moving_variance.eval()
     # Although we feed different images, the moving_mean and moving_variance
     # shouldn't change.
     self.assertAllClose(mean, expected_mean)
     self.assertAllClose(variance, expected_var)
def fit_em(X, initial_mus, max_steps, tol, min_covar=MIN_COVAR_DEFAULT):
    tf.reset_default_graph()
    
    N, D = X.shape
    K, Dmu = initial_mus.shape
    assert D == Dmu
        
    mus0 = initial_mus
    sigmas0 = np.tile(np.var(X, axis=0), (K, 1))
    alphas0 = np.ones(K) / K
    X = tf.constant(X)
    
    mus, sigmas, alphas = (tf.Variable(x, dtype='float64') for x in [mus0, sigmas0, alphas0])
    
    all_ll, resp = estep(X, mus, sigmas, alphas)
    cmus, csigmas, calphas = mstep(X, resp, min_covar=min_covar)
    update_mus_step = tf.assign(mus, cmus)
    update_sigmas_step = tf.assign(sigmas, csigmas)
    update_alphas_step = tf.assign(alphas, calphas)     
    
    init_op = tf.initialize_all_variables()
    ll = prev_ll = -np.inf

    with tf.Session() as sess:
        sess.run(init_op)
        for i in range(max_steps):
            ll = sess.run(tf.reduce_mean(all_ll))
            sess.run((update_mus_step, update_sigmas_step, update_alphas_step))
            #print('EM iteration', i, 'log likelihood', ll)
            if abs(ll - prev_ll) < tol:
                break
            prev_ll = ll
        m, s, a = sess.run((mus, sigmas, alphas))
    
    return ll, m, s, a
Beispiel #8
0
  def _apply(self, grad, var, indices=None):
    lr = tf.cast(self._learning_rate_tensor, var.dtype.base_dtype)
    m = self.get_slot(var, "m")
    v = self.get_slot(var, "v")
    beta1_t = tf.cast(self._beta1_t, var.dtype.base_dtype)
    beta2_t = tf.cast(self._beta2_t, var.dtype.base_dtype)
    epsilon_t = tf.cast(self._epsilon_t, var.dtype.base_dtype)

    # m_t = beta1 * m + (1 - beta1) * g_t
    m_scaled_g_values = grad * (1 - beta1_t)
    m_t = tf.assign(m, m * beta1_t, use_locking=self._use_locking)
    with tf.control_dependencies([m_t]):
      m_t = self._assign_add(m, updates=m_scaled_g_values, indices=indices)
    m_gathered = self._gather(m_t, indices=indices)

    # Also see tf.nn.moments.
    variance = tf.squared_difference(grad, m_gathered)

    # v_t = beta2 * v + (1 - beta2) * variance
    v_scaled_new_values = variance * (1 - beta2_t)
    v_t = tf.assign(v, v * beta2_t, use_locking=self._use_locking)
    with tf.control_dependencies([v_t]):
      v_t = self._assign_add(v, updates=v_scaled_new_values, indices=indices)
    v_gathered = self._gather(v_t, indices=indices)

    factor = v_gathered / (variance + epsilon_t)
    update = lr * grad * tf.minimum(factor, 1.0)
    var_update = self._assign_sub(ref=var, updates=update, indices=indices)
    return tf.group(*[var_update, m_t])
Beispiel #9
0
 def __call__(self, x):
     if x.get_shape().ndims == 2:
         mean, var = tf.nn.moments(x, axes=(0,), keep_dims=True)
     normalized_x = (x-mean)/tf.sqrt(var+self.epsilon)
     moment_op = tf.group(tf.assign(self.mean, mean), tf.assign(self.var, var))
     moment_op = tf.cond(self.train[0], lambda: moment_op, lambda: tf.no_op())
     return self.gamma*normalized_x+self.beta, moment_op
def batch_norm(inputs, name_scope, is_training, epsilon=1e-3, decay=0.99):
    with tf.variable_scope(name_scope):
        size = inputs.get_shape().as_list()[1]

        gamma = tf.get_variable(
            'gamma', [size], initializer=tf.constant_initializer(0.1))
        # beta = tf.get_variable('beta', [size], initializer=tf.constant_initializer(0))
        beta = tf.get_variable('beta', [size])

        pop_mean = tf.get_variable('pop_mean', [size],
                                   initializer=tf.zeros_initializer(), trainable=False)
        pop_var = tf.get_variable('pop_var', [size],
                                  initializer=tf.ones_initializer(), trainable=False)
        batch_mean, batch_var = tf.nn.moments(inputs, [0])

        train_mean_op = tf.assign(
            pop_mean, pop_mean * decay + batch_mean * (1 - decay))
        train_var_op = tf.assign(
            pop_var, pop_var * decay + batch_var * (1 - decay))

        def batch_statistics():
            with tf.control_dependencies([train_mean_op, train_var_op]):
                return tf.nn.batch_normalization(inputs, batch_mean, batch_var, beta, gamma, epsilon)

        def pop_statistics():
            return tf.nn.batch_normalization(inputs, pop_mean, pop_var, beta, gamma, epsilon)

        # control flow
        return tf.cond(is_training, batch_statistics, pop_statistics)
Beispiel #11
0
 def testIsVariableInitialized(self):
   for use_gpu in [True, False]:
     with self.test_session(use_gpu=use_gpu):
       v0 = state_ops.variable_op([1, 2], tf.float32)
       self.assertEqual(False, tf.is_variable_initialized(v0).eval())
       tf.assign(v0, [[2.0, 3.0]]).eval()
       self.assertEqual(True, tf.is_variable_initialized(v0).eval())
    def mean_var_with_update():
        ema_apply_op = ema.apply([batch_mean, batch_var])
        pop_mean_op = tf.assign(pop_mean, ema.average(batch_mean))
        pop_var_op = tf.assign(pop_var, ema.average(batch_var))

        with tf.control_dependencies([ema_apply_op, pop_mean_op, pop_var_op]):
            return tf.identity(batch_mean), tf.identity(batch_var)
Beispiel #13
0
def batchnorm(x, gamma, beta, r_mean, r_var):
  mean, var = tf.nn.moments(x,[0])
  update_mean = tf.assign(r_mean,0.9 * r_mean + 0.1 * mean)
  update_var = tf.assign(r_var,0.9 * r_var + 0.1 * var)
  with tf.control_dependencies([update_mean,update_var]):
    return tf.nn.batch_normalization(x,tf.clip_by_value(r_mean,1e-10,100),tf.clip_by_value(r_var,1e-10,100),
                                     offset=beta,scale=gamma,variance_epsilon=1e-5)
    def expectation_maximization_step(self, x):
        
        # probability of emission sequence
        obs_prob_seq = tf.gather(self.E, x)

        with tf.name_scope('Forward_Backward'):
            self.forward_backward(obs_prob_seq)

        with tf.name_scope('Re_estimate_transition'):
            new_T0, new_transition = self.re_estimate_transition(x)
        
        with tf.name_scope('Re_estimate_emission'):
            new_emission = self.re_estimate_emission(x)

        with tf.name_scope('Check_Convergence'):
            converged = self.check_convergence(new_T0, new_transition, new_emission)

        with tf.name_scope('Update_parameters'):
            self.T0 = tf.assign(self.T0, new_T0)
            self.E = tf.assign(self.E, new_emission)
            self.T = tf.assign(self.T, new_transition)
            #self.count = tf.assign_add(self.count, 1)
             
            with tf.name_scope('histogram_summary'):
                _ = tf.histogram_summary(self.T0.name, self.T0)
                _ = tf.histogram_summary(self.T.name, self.T)
                _ = tf.histogram_summary(self.E.name, self.E)
        return converged
  def __init__(self, gan=None, config=None, trainer=None, name="ProgressCompressTrainHook"):
    super().__init__(config=config, gan=gan, trainer=trainer, name=name)
    d_loss = []

    self.x = tf.Variable(tf.zeros_like(gan.inputs.x))
    self.g = tf.Variable(tf.zeros_like(gan.generator.sample))

    stacked = tf.concat([self.gan.inputs.x, self.gan.generator.sample], axis=0)
    self.assign_x = tf.assign(self.x, gan.inputs.x)
    self.assign_g = tf.assign(self.g, gan.generator.sample)
    self.re_init_d = [d.initializer for d in gan.discriminator.variables()]
    gan.hack = self.g

    self.assign_knowledge_base = []

    bs = gan.batch_size()
    real = gan.discriminator.named_layers['knowledge_base_target']#tf.reshape(gan.loss.sample[:2], [2,-1])
    _inputs = hc.Config({'x':real})
    inner_gan = KBGAN(config=self.config.knowledge_base, inputs=_inputs, x=real, latent=stacked)
    self.kb_loss = inner_gan.loss
    self.kb = inner_gan.generator
    self.trainer = inner_gan.trainer
    variables = inner_gan.variables()
    #variables += self.kb.variables()

    for c in gan.components:
        if hasattr(c, 'knowledge_base'):
            for name, net in c.knowledge_base:
                assign = self.kb.named_layers[name]
                if self.ops.shape(assign)[0] > self.ops.shape(net)[0]:
                    assign = tf.slice(assign,[0 for i in self.ops.shape(net)] , [self.ops.shape(net)[0]]+self.ops.shape(assign)[1:])
                self.assign_knowledge_base.append(tf.assign(net, assign))

    self.gan.add_metric('d_kb', self.kb_loss.sample[0])
    self.gan.add_metric('g_kb', self.kb_loss.sample[1])
def train_spectrogram_encoder():
  tf.initialize_all_variables().run()
  print("Pretrain")
  for i in range(6000-1):
    batch_xs, batch_ys = speech.train.next_batch(100)
    # WTF, tensorflow can't do 3D tensor operations?
    # https://github.com/tensorflow/tensorflow/issues/406 =>
    batch_xs=[flatten(matrix) for matrix in batch_xs]
    #  you have to reshape to flat/matrix data? why didn't they call it matrixflow?
    feed = {x: batch_xs, y_: batch_ys}
    speech_step.run(feed) # better for encod_entropy too! (later)
    if(i%100==0):
      print("iteration %d"%i)#, end=' ')
      eval(feed)
    if((i+1)%7000==0):
      print("l_rate*=0.1")
      sess.run(tf.assign(l_rate,l_rate*0.1))

  print("Train")
  for i in range(100000):
    batch_xs, batch_ys = speech.train.next_batch(100)
    feed = {x: batch_xs, y_: batch_ys}
    if((i+1)%9000==0):sess.run(tf.assign(l_rate,l_rate*0.3))
    encod_step.run(feed) # alternating!
    speech_step.run(feed)
    train_step.run(feed)
    if(i%100==0):
      print("iteration %d"%i)#, end=' ')
      eval(feed)
Beispiel #17
0
 def testReuseVars(self):
   height, width = 3, 3
   with self.test_session() as sess:
     image_shape = (10, height, width, 3)
     image_values = np.random.rand(*image_shape)
     expected_mean = np.mean(image_values, axis=(0, 1, 2))
     expected_var = np.var(image_values, axis=(0, 1, 2))
     images = tf.constant(image_values, shape=image_shape, dtype=tf.float32)
     output = tf.contrib.layers.batch_norm(images,
                                           decay=0.1,
                                           is_training=False)
     update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
     self.assertEquals(update_ops, [])
     # Initialize all variables
     sess.run(tf.initialize_all_variables())
     moving_mean = tf.contrib.framework.get_variables(
         'BatchNorm/moving_mean')[0]
     moving_variance = tf.contrib.framework.get_variables(
         'BatchNorm/moving_variance')[0]
     mean, variance = sess.run([moving_mean, moving_variance])
     # After initialization moving_mean == 0 and moving_variance == 1.
     self.assertAllClose(mean, [0] * 3)
     self.assertAllClose(variance, [1] * 3)
     # Simulate assigment from saver restore.
     init_assigns = [tf.assign(moving_mean, expected_mean),
                     tf.assign(moving_variance, expected_var)]
     sess.run(init_assigns)
     for _ in range(10):
       sess.run([output], {images: np.random.rand(*image_shape)})
     mean = moving_mean.eval()
     variance = moving_variance.eval()
     # Although we feed different images, the moving_mean and moving_variance
     # shouldn't change.
     self.assertAllClose(mean, expected_mean)
     self.assertAllClose(variance, expected_var)
Beispiel #18
0
 def fit(self, xs, ys):
     sess = tf.get_default_session()
     if self._normalize_inputs:
         # recompute normalizing constants for inputs
         sess.run([
             tf.assign(self._x_mean_var, np.mean(xs, axis=0, keepdims=True)),
             tf.assign(self._x_std_var, np.std(xs, axis=0, keepdims=True) + 1e-8),
         ])
     if self._normalize_outputs:
         # recompute normalizing constants for outputs
         sess.run([
             tf.assign(self._y_mean_var, np.mean(ys, axis=0, keepdims=True)),
             tf.assign(self._y_std_var, np.std(ys, axis=0, keepdims=True) + 1e-8),
         ])
     if self._use_trust_region:
         old_means, old_log_stds = self._f_pdists(xs)
         inputs = [xs, ys, old_means, old_log_stds]
     else:
         inputs = [xs, ys]
     loss_before = self._optimizer.loss(inputs)
     if self._name:
         prefix = self._name + "_"
     else:
         prefix = ""
     logger.record_tabular(prefix + 'LossBefore', loss_before)
     self._optimizer.optimize(inputs)
     loss_after = self._optimizer.loss(inputs)
     logger.record_tabular(prefix + 'LossAfter', loss_after)
     if self._use_trust_region:
         logger.record_tabular(prefix + 'MeanKL', self._optimizer.constraint_val(inputs))
     logger.record_tabular(prefix + 'dLoss', loss_before - loss_after)
def parallel_acc_by_tags(model, sess, max_parallel_calcs, data_folder, read_func, from_file=None, data_set="test",
                         feature="images", orientations=None):
    total_images = 0
    if orientations is None:
        orientations = [0, 90, 180, 270]
    images, labels, tags = input_pipeline(data_folder_loc, max_parallel_calcs, data_set=data_set,
                                          feature=feature, num_images=None,
                                          binary_file=False, orientations=orientations,
                                          from_file=from_file, num_epochs=1)

    incorrect_images_list = tf.Variable([], dtype=tf.string, trainable=False, name="Incorrect_images")
    adder_image_names = tf.placeholder(dtype=tf.string, shape=[None], name="Adder_images")
    new_incorrect_images_list = tf.concat(0, [incorrect_images_list, adder_image_names])
    add_incorrect_images = tf.assign(incorrect_images_list, new_incorrect_images_list, use_locking=True,
                                     validate_shape=False)

    incorrect_labels_list = tf.Variable([], dtype=tf.int32, trainable=False, name="Incorrect_image_labels")
    adder_image_labels = tf.placeholder(dtype=tf.int32, shape=[None], name="Adder_image_labels")
    new_incorrect_labels_list = tf.concat(0, [incorrect_labels_list, adder_image_labels])
    add_incorrect_labels = tf.assign(incorrect_labels_list, new_incorrect_labels_list, use_locking=True,
                                     validate_shape=False)

    init_ops = tf.group(tf.local_variables_initializer(), tf.global_variables_initializer())
    sess.run(init_ops)

    coord = tf.train.Coordinator()
    threads = tf.train.start_queue_runners(sess=sess, coord=coord)
    steps = 0
    try:
        print("Checking Accuracy")
        while not coord.should_stop():
            steps += 1
            raw_imgs_list, labels_list, tags_list = sess.run([images, labels, tags])
            imgs_list = read_func(raw_imgs_list)
            preds = sess.run(model.correct_predictions, feed_dict={model.inputs: imgs_list, model.testy: labels_list,
                                                                   model.keep_probs: 1})
            total_images += len(preds)
            incorrect_indices = np.where(preds == 0)

            # Uses locking so we do not lose any incorrect classifications
            sess.run(add_incorrect_images, feed_dict={adder_image_names: tags_list[incorrect_indices]})
            sess.run(add_incorrect_labels, feed_dict={adder_image_labels: labels_list[incorrect_indices]})

            if steps % 100 == 0:
                print("Calculated " + str(steps*max_parallel_calcs) + " files")
    except tf.errors.OutOfRangeError:
        print('Done training -- epoch limit reached')
    finally:
        # When done, ask the threads to stop.
        coord.request_stop()
    coord.join(threads)
    inc_name = sess.run(incorrect_images_list)
    inc_label = sess.run(incorrect_labels_list)
    print("Correct classifications: " + str(total_images - len(inc_name)))
    print("Total images: " + str(total_images))
    print("Accuracy: " + str((total_images - len(inc_name))/total_images))
    with open(os.path.join(data_folder, "incorrect.txt"), 'w') as f:
        for i in range(len(inc_name)):
            f.write(os.path.join(data_folder, inc_name[i].decode('utf-8')) + ', ' + str(inc_label[i]*90) + '\n')
    sess.close()
def style_transfer_train(loss, img_var, initial_lr=3.0, decayed_lr=0.1, decay_lr_at=180, max_iter=200, print_every=50):
    # Create and initialize the Adam optimizer
    lr_var = tf.Variable(initial_lr, name="lr")
    # Create train_op that updates the generated image when run
    with tf.variable_scope("optimizer") as opt_scope:
        train_op = tf.train.AdamOptimizer(lr_var).minimize(loss, var_list=[img_var])
    # Initialize the generated image and optimization variables
    opt_vars = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope=opt_scope.name)
    sess.run(tf.variables_initializer([lr_var, img_var] + opt_vars))
    # Create an op that will clamp the image values when run
    clamp_image_op = tf.assign(img_var, tf.clip_by_value(img_var, -1.5, 1.5))

    imgs_in_process = []

    # Hardcoded handcrafted 
    for t in range(max_iter):
        # Take an optimization step to update img_var
        sess.run(train_op)
        if t < decay_lr_at:
            sess.run(clamp_image_op)
        if t == decay_lr_at:
            sess.run(tf.assign(lr_var, decayed_lr))
        if t % print_every == 0:
            print("train step: %d" % t)
            img = sess.run(img_var)
            imgs_in_process.append(img[0])
    print("train step: %d" % t)
    final_img = sess.run(img_var)[0]
    return imgs_in_process, final_img
Beispiel #21
0
def bn_layer(inputs,is_training,name='BatchNorm',moving_decay=0.9,eps=1e-5):
    shape = inputs.shape
    assert len(shape) in [2,4]

    param_shape = shape[-1]

    gamma = tf.Variable(tf.ones(param_shape), name='gamma')
    beta  = tf.Variable(tf.zeros(param_shape), name='beta')
    mean  = tf.Variable(tf.ones(param_shape), trainable=False, name='mean')
    var   = tf.Variable(tf.ones(param_shape), trainable=False, name='var')

    tf.add_to_collection('l2_losses', tf.contrib.layers.l2_regularizer(lambda1)(gamma)) 
    tf.add_to_collection('l2_losses', tf.contrib.layers.l2_regularizer(lambda1)(beta)) 
    tf.add_to_collection('l2_losses', tf.contrib.layers.l2_regularizer(lambda1)(mean)) 
    tf.add_to_collection('l2_losses', tf.contrib.layers.l2_regularizer(lambda1)(var)) 



    if is_training == True:
        batch_mean, batch_var = tf.nn.moments(inputs,[0,1,2],name='moments')
        mean = tf.assign(mean, batch_mean)
        var = tf.assign(var, batch_var)
        return tf.nn.batch_normalization(inputs,batch_mean+mean*1e-10,batch_var+var*1e-10,gamma,beta,eps)
    else:
        return tf.nn.batch_normalization(inputs,mean,var,gamma,beta,eps)
Beispiel #22
0
    def batch_normalization(self, input_data, name, scale_offset=True, relu=False):

        with tf.variable_scope(name) as scope:
            shape = [input_data.get_shape()[-1]]
            pop_mean = tf.get_variable("mean", shape, initializer = tf.constant_initializer(0.0), trainable=False)
            pop_var = tf.get_variable("variance", shape, initializer = tf.constant_initializer(1.0), trainable=False)
            epsilon = 1e-4
            decay = 0.999
            if scale_offset:
                scale = tf.get_variable("scale", shape, initializer = tf.constant_initializer(1.0))
                offset = tf.get_variable("offset", shape, initializer = tf.constant_initializer(0.0))
            else:
                scale, offset = (None, None)
            if self.is_training:
                batch_mean, batch_var = tf.nn.moments(input_data, [0, 1, 2])

                train_mean = tf.assign(pop_mean,
                               pop_mean * decay + batch_mean * (1 - decay))
                train_var = tf.assign(pop_var,
                              pop_var * decay + batch_var * (1 - decay))
                with tf.control_dependencies([train_mean, train_var]):
                    output = tf.nn.batch_normalization(input_data,
                    batch_mean, batch_var, offset, scale, epsilon, name = name)
            else:
                output = tf.nn.batch_normalization(input_data,
                pop_mean, pop_var, offset, scale, epsilon, name = name)

            if relu:
                output = tf.nn.relu(output)

            return output
def update_parameters(mu, sigma, best_params):
  new_mu = tf.reduce_mean(best_params, 0)
  mu_ass = tf.assign(mu, new_mu)
  diff = tf.squared_difference(best_params, new_mu)
  std = tf.sqrt(tf.reduce_mean(diff, 0))
  sigma_ass = tf.assign(sigma, std)
  return mu_ass, sigma_ass
Beispiel #24
0
    def run_tf_simulation(self, c_in, h_in, timesteps=100, dt=0.005):
        r_e = tf.Variable( tf.zeros([self.N_pairs, self.N_pairs]) )
        r_i = tf.Variable( tf.zeros([self.N_pairs, self.N_pairs]) )
        
        W_EE = tf.placeholder(tf.float32)
        W_EI = tf.placeholder(tf.float32)
        W_IE = tf.placeholder(tf.float32)
        W_II = tf.placeholder(tf.float32)
        k = tf.placeholder(tf.float32)
        n_E = tf.placeholder(tf.float32)
        n_I = tf.placeholder(tf.float32) 
        tau_E = tf.placeholder(tf.float32)
        tau_I = tf.placeholder(tf.float32)
        
        c0 = tf.constant(c_in)
        h0 = tf.constant(h_in)
                
        # Compile functions:
        I_E = c0*h0 + tf.transpose(tf.reshape(tf.reduce_sum(W_EE * r_e, [1,2]), [75,75])) \
            - tf.transpose(tf.reshape(tf.reduce_sum(W_EI * r_i, [1,2]), [75,75]))
        I_I = c0*h0 + tf.transpose(tf.reshape(tf.reduce_sum(W_IE * r_e, [1,2]), [75,75])) \
            - tf.transpose(tf.reshape(tf.reduce_sum(W_II * r_i, [1,2]), [75,75]))

        I_thresh_E = tf.maximum(0., I_E)
        I_thresh_I = tf.maximum(0., I_I)

        r_SS_E = k * tf.pow(I_thresh_E, n_E)
        r_SS_I = k * tf.pow(I_thresh_I, n_I)

        rE_out = r_e + dt*(-r_e+r_SS_E)/tau_E
        rI_out = r_i + dt*(-r_i+r_SS_I)/tau_I
        
        update_rE = tf.assign(r_e, rE_out)
        update_rI = tf.assign(r_i, rI_out)
        
        init = tf.initialize_all_variables()
        
        rE = 0
        rI = 0
        
        fd = {W_EE:self.W_EE.astype(np.float32), 
                  W_EI:self.W_EI.astype(np.float32), 
                  W_IE:self.W_IE.astype(np.float32), 
                  W_II:self.W_II.astype(np.float32),
                  k:self.k.astype(np.float32),
                  n_E:self.n_E.astype(np.float32),
                  n_I:self.n_I.astype(np.float32),
                  tau_E:self.tau_E.astype(np.float32),
                  tau_I:self.tau_I.astype(np.float32)}
        
        with tf.Session() as sess:
            sess.run(init, feed_dict=fd)
            for t in range(timesteps):
                # run the simulation
                sess.run([update_rE, update_rI], feed_dict=fd)
            # fetch the rates
            rE = sess.run([r_e], feed_dict=fd)
            rI = sess.run([r_i], feed_dict=fd)
            
        return rE, rI
Beispiel #25
0
  def _cached_copy(self, var, name):
    """Helper function to create a worker cached copy of a Variable.

    Args:
      var: Variable or list of Variable to cache. If a list, the items are
        concatenated along dimension 0 to get the cached entry.
      name: name of cached variable.

    Returns:
      Tuple consisting of following three entries:
      cache: the new transient Variable.
      cache_init: op to initialize the Variable
      cache_reset: op to reset the Variable to some default value
    """
    if var is None:
      return None, None, None
    else:
      cache = WALSModel._transient_var(name)
      with ops.colocate_with(cache):
        if isinstance(var, list):
          assert var
          if len(var) == 1:
            var = var[0]
          else:
            var = tf.concat(0, var)

      cache_init = tf.assign(cache, var, validate_shape=False)
      cache_reset = tf.assign(cache, 1.0, validate_shape=False)
      return cache, cache_init, cache_reset
Beispiel #26
0
 def __init__(self,inputs,size,is_training,sess,parForTarget=None,bn_param=None):
     
     self.sess = sess        
     self.scale = tf.Variable(tf.random_uniform([size],0.9,1.1))
     self.beta = tf.Variable(tf.random_uniform([size],-0.03,0.03))
     self.pop_mean = tf.Variable(tf.random_uniform([size],-0.03,0.03),trainable=False)
     self.pop_var = tf.Variable(tf.random_uniform([size],0.9,1.1),trainable=False)        
     self.batch_mean, self.batch_var = tf.nn.moments(inputs,[0])        
     self.train_mean = tf.assign(self.pop_mean,self.pop_mean * decay + self.batch_mean * (1 - decay))  
     self.train_var = tf.assign(self.pop_var,self.pop_var * decay + self.batch_var * (1 - decay))
             
     def training(): 
         return tf.nn.batch_normalization(inputs,
             self.batch_mean, self.batch_var, self.beta, self.scale, 0.0000001 )
 
     def testing(): 
         return tf.nn.batch_normalization(inputs,
         self.pop_mean, self.pop_var, self.beta, self.scale, 0.0000001)
     
     if parForTarget!=None:
         self.parForTarget = parForTarget
         self.updateScale = self.scale.assign(self.scale*(1-TAU)+self.parForTarget.scale*TAU)
         self.updateBeta = self.beta.assign(self.beta*(1-TAU)+self.parForTarget.beta*TAU)
         self.updateTarget = tf.group(self.updateScale, self.updateBeta)
         
     self.bnorm = tf.cond(is_training,training,testing) 
     
def running_mean(cost, tag_name, batch_size=1):
    with tf.name_scope("running_mean_" + tag_name):
        with tf.variable_scope(tag_name):
            cost_sum = tf.get_variable(
              "cost_sum",
              initializer=tf.zeros_initializer,
              dtype=tf.float64,
              shape=(),
              collections=[tf.GraphKeys.LOCAL_VARIABLES],
              trainable=False)
            batches = tf.get_variable(
              "cost_num_batches",
              initializer=tf.zeros_initializer,
              dtype=tf.int32,
              shape=(),
              collections=[tf.GraphKeys.LOCAL_VARIABLES],
              trainable=False)

        cost_add = tf.assign_add(cost_sum, tf.cast(cost, dtype=tf.float64))
        batches_add = tf.assign_add(batches, batch_size)
        update_cost_mean = tf.group(cost_add, batches_add)

        reset_batches = tf.assign(batches, 0)
        reset_cost_sum = tf.assign(cost_sum, 0.0)
        reset_cost_mean = tf.group(reset_batches, reset_cost_sum)

        mean_cost = tf.divide(
          cost_sum,
          tf.cast(batches, dtype=tf.float64))
        train_loss_summary = tf.summary.scalar(tag_name, mean_cost)

    return reset_cost_mean, update_cost_mean, train_loss_summary
 def if_train():
     batch_mean, batch_var = tf.nn.moments(inputs, axes=[0, 1, 2]) # compute mean across these axes (all but channels)
     # Exponential Mov. Avg. Decay (compute moving average of population, update as batches are seen.)
     train_mean = tf.assign(pop_mean, pop_mean * decay + batch_mean * (1 - decay))
     train_var  = tf.assign(pop_var,  pop_var  * decay + batch_var  * (1 - decay))
     with tf.control_dependencies([train_mean, train_var]): # makes sure the moving averages are updated during training (absent below:)
       return tf.identity(batch_mean), tf.identity(batch_var)
Beispiel #29
0
 def getUpdatesForBnRollingAverage(self) :
     # This function or something similar should stay, even if I clean the BN rolling average.
     if self._appliedBnInLayer :
         return [ tf.assign( ref=self._sharedNewMu_B, value=self._newMu_B, validate_shape=True ),
                 tf.assign( ref=self._sharedNewVar_B, value=self._newVar_B, validate_shape=True ) ]
     else :
         return []
 def overflow_case():
   new_scale_val = tf.clip_by_value(self.scale / self.step_factor,
                                    self.scale_min, self.scale_max)
   scale_assign = tf.assign(self.scale, new_scale_val)
   overflow_iter_assign = tf.assign(self.last_overflow_iteration, self.iteration)
   with tf.control_dependencies([scale_assign, overflow_iter_assign]):
     return tf.identity(self.scale)
    def __init__(self, is_training, config, input_):
        self._input = input_

        batch_size = input_.batch_size
        num_steps = input_.num_steps
        size = config.hidden_size
        vocab_size = config.vocab_size

        def rnn_cell():
            # With the latest TensorFlow source code (as of Mar 27, 2017),
            # the BasicLSTMCell will need a reuse parameter which is unfortunately not
            # defined in TensorFlow 1.0. To maintain backwards compatibility, we add
            # an argument check here:
            if 'reuse' in inspect.getargspec(
                    tf.contrib.rnn.BasicRNNCell.__init__).args:
                return tf.contrib.rnn.BasicRNNCell(
                    size, reuse=tf.get_variable_scope().reuse)
            else:
                return tf.contrib.rnn.BasicRNNCell(size)

        attn_cell = rnn_cell

        if is_training and config.keep_prob < 1:

            def attn_cell():
                return tf.contrib.rnn.DropoutWrapper(
                    rnn_cell(), output_keep_prob=config.keep_prob)

        self.cell = tf.contrib.rnn.MultiRNNCell(
            [attn_cell() for _ in range(config.num_layers)],
            state_is_tuple=True)

        self._initial_state = self.cell.zero_state(batch_size, data_type())
        self._initial_state_single = self.cell.zero_state(1, data_type())

        self.initial = tf.reshape(tf.stack(axis=0,
                                           values=self._initial_state_single),
                                  [config.num_layers, 1, size],
                                  name="test_initial_state")

        # first implement the less efficient version
        test_word_in = tf.placeholder(tf.int32, [1, 1], name="test_word_in")

        state_placeholder = tf.placeholder(tf.float32,
                                           [config.num_layers, 1, size],
                                           name="test_state_in")
        # unpacking the input state context
        l = tf.unstack(state_placeholder, axis=0)
        test_input_state = tuple([l[idx] for idx in range(config.num_layers)])

        with tf.device("/cpu:0"):
            self.embedding = tf.get_variable("embedding", [vocab_size, size],
                                             dtype=data_type())

            inputs = tf.nn.embedding_lookup(self.embedding, input_.input_data)
            test_inputs = tf.nn.embedding_lookup(self.embedding, test_word_in)

        # test time
        with tf.variable_scope("RNN"):
            (test_cell_output,
             test_output_state) = self.cell(test_inputs[:, 0, :],
                                            test_input_state)

        test_state_out = tf.reshape(tf.stack(axis=0, values=test_output_state),
                                    [config.num_layers, 1, size],
                                    name="test_state_out")
        test_cell_out = tf.reshape(test_cell_output, [1, size],
                                   name="test_cell_out")
        # above is the first part of the graph for test
        # test-word-in
        #               > ---- > test-state-out
        # test-state-in        > test-cell-out

        # below is the 2nd part of the graph for test
        # test-word-out
        #               > prob(word | test-word-out)
        # test-cell-in

        test_word_out = tf.placeholder(tf.int32, [1, 1], name="test_word_out")
        cellout_placeholder = tf.placeholder(tf.float32, [1, size],
                                             name="test_cell_in")

        softmax_w = tf.get_variable("softmax_w", [size, vocab_size],
                                    dtype=data_type())
        softmax_b = tf.get_variable("softmax_b", [vocab_size],
                                    dtype=data_type())

        test_logits = tf.matmul(cellout_placeholder, softmax_w) + softmax_b
        test_softmaxed = tf.nn.log_softmax(test_logits)

        p_word = test_softmaxed[0, test_word_out[0, 0]]
        test_out = tf.identity(p_word, name="test_out")

        if is_training and config.keep_prob < 1:
            inputs = tf.nn.dropout(inputs, config.keep_prob)

        # Simplified version of models/tutorials/rnn/rnn.py's rnn().
        # This builds an unrolled LSTM for tutorial purposes only.
        # In general, use the rnn() or state_saving_rnn() from rnn.py.
        #
        # The alternative version of the code below is:
        #
        # inputs = tf.unstack(inputs, num=num_steps, axis=1)
        # outputs, state = tf.contrib.rnn.static_rnn(
        #     cell, inputs, initial_state=self._initial_state)
        outputs = []
        state = self._initial_state
        with tf.variable_scope("RNN"):
            for time_step in range(num_steps):
                if time_step > -1: tf.get_variable_scope().reuse_variables()
                (cell_output, state) = self.cell(inputs[:, time_step, :],
                                                 state)
                outputs.append(cell_output)

        output = tf.reshape(tf.stack(axis=1, values=outputs), [-1, size])
        logits = tf.matmul(output, softmax_w) + softmax_b
        loss = tf.contrib.legacy_seq2seq.sequence_loss_by_example(
            [logits], [tf.reshape(input_.targets, [-1])],
            [tf.ones([batch_size * num_steps], dtype=data_type())])
        self._cost = cost = tf.reduce_sum(loss) / batch_size
        self._final_state = state

        if not is_training:
            return

        self._lr = tf.Variable(0.0, trainable=False)
        tvars = tf.trainable_variables()
        grads, _ = tf.clip_by_global_norm(tf.gradients(cost, tvars),
                                          config.max_grad_norm)
        optimizer = tf.train.MomentumOptimizer(self._lr, 0.9)
        self._train_op = optimizer.apply_gradients(
            zip(grads, tvars),
            global_step=tf.contrib.framework.get_or_create_global_step())

        self._new_lr = tf.placeholder(tf.float32,
                                      shape=[],
                                      name="new_learning_rate")
        self._lr_update = tf.assign(self._lr, self._new_lr)
Beispiel #32
0
adder_node = a + b

#getting more complex by calling another function
add_and_triple = adder_node * 3.

#variables are not like constants
#there values can be later assigned
#they are initialized when we call tf.global_variables_initializer P.S. line 48
W = tf.Variable([.3], tf.float32)
bias = tf.Variable([-.3], tf.float32)
x = tf.placeholder(tf.float32)
linear_model = W * x + bias

#now in case we want to reassign the values to a variable
#we can make use of assign fn. then we have to run sess like in line  55
fixW = tf.assign(W, [-1.])
fixb = tf.assign(bias, [1.])

#loss function i.e. squared difference between calculated input and expected input
y = tf.placeholder(tf.float32)
#tf.square just like np.square
squared_deltas = tf.square(linear_model - y)
#calculate sum accross an axis .Pls read https://www.tensorflow.org/api_docs/python/tf/reduce_sum
loss = tf.reduce_sum(squared_deltas)

#A session encapsulates the control and state of the TensorFlow runtime.
sess = tf.Session()
print(sess.run([node1, node2]))
print(sess.run([node3]))
print(sess.run(adder_node, {a: 3, b: 4.5}))
print(sess.run(adder_node, {a: [1, 3], b: [2, 4]}))
Beispiel #33
0
 def __init__(self, inputs, outputs, updates=[]):
     self.inputs = list(inputs)
     self.outputs = list(outputs)
     with tf.control_dependencies(self.outputs):
         self.updates = [tf.assign(p, new_p) for (p, new_p) in updates]
Beispiel #34
0
def set_value(x, value):
    tf.assign(x, np.asarray(value)).op.run(session=_get_session())
Beispiel #35
0
def factorize(A, hyperparameters):
    #l1_regularizer_parameter = hyperparameters["l1_regularizer_parameter"]
    #dimension = hyperparameters["dimension"]
    #zero_out_threshold = hyperparameters["zero_out_threshold"]
    #lr = hyperparameters["lr"]
    #niters = hyperparameters["niters"]

    l1_regularizer_parameter = .0001
    zero_out_thresholds = [1e-7, 1e-7]
    lr = 3e-3
    niters = 10000
    intermediate_dimension = 500

    # Factorize A in to matrices of shape shapes
    tf.reset_default_graph()

    shapes = [(A.shape[0], intermediate_dimension),
              (intermediate_dimension, A.shape[1])]
    variables = []
    for shape in shapes:
        variables.append(
            tf.Variable(
                tf.random_normal(shape,
                                 stddev=.001 + tf.eye(*shape),
                                 dtype=tf.float32)))

    # Multiply the variables together
    to_optimize = variables[0] + tf.eye(
        *tuple(variables[0].get_shape().as_list()))
    for variable in variables[1:]:
        to_optimize = tf.matmul(
            to_optimize,
            tf.eye(*tuple(variable.get_shape().as_list())) + variable)
        #to_optimize = tf.matmul(to_optimize, variable)

    assert (tuple(to_optimize.get_shape().as_list()) == tuple(A.shape))

    # Construct the optimization
    target_placeholder = tf.placeholder(tf.float32, shape=A.shape)
    loss_frobenius_error = tf.norm(target_placeholder - to_optimize)

    # Add l1 loss
    l1_parameter_placeholder = tf.placeholder(dtype=tf.float32)
    l1_regularizer = tf.contrib.layers.l1_regularizer(scale=1.0, scope=None)
    regularization_penalty = tf.contrib.layers.apply_regularization(
        l1_regularizer, variables)
    loss = loss_frobenius_error + l1_parameter_placeholder * regularization_penalty
    #loss = loss_frobenius_error

    # Create opt
    lr_placeholder = tf.placeholder(dtype=tf.float32)
    opt = tf.train.GradientDescentOptimizer(learning_rate=lr_placeholder)
    minimize = opt.minimize(loss)

    # Zero out values below absolute threshold
    zero_ops = []
    with tf.control_dependencies([minimize]):
        for matrix, thresh in zip(variables, zero_out_thresholds):
            mask = tf.cast(
                tf.greater(tf.abs(matrix),
                           thresh * tf.ones_like(matrix, dtype=tf.float32)),
                tf.float32)
            zero_ops.append(tf.assign(matrix, tf.multiply(mask, matrix)))
        minimize_and_zero_out = tf.group(zero_ops)

    # Do optimization
    sess = tf.Session()
    sess.run(tf.global_variables_initializer())

    results = []
    for i in range(niters):
        _, loss_materialized, loss_frobenius_error_materialized = sess.run(
            [minimize_and_zero_out, loss, loss_frobenius_error],
            feed_dict={
                target_placeholder: A,
                l1_parameter_placeholder: l1_regularizer_parameter,
                lr_placeholder: lr
            })

        if i % 100 == 0:
            # Also calculate sparsity
            variables_materialized = sess.run(variables)
            total_number_of_nnzs = sum(
                [np.count_nonzero(x) for x in variables_materialized])
            nnzs = [np.count_nonzero(x) for x in variables_materialized]

            print(
                "Loss: %g, Loss_frob_error: %g, # nnzs in factored matrices: %d, nnzs: %s"
                % (loss_materialized, loss_frobenius_error_materialized,
                   total_number_of_nnzs, str(nnzs)))
            results.append(
                (total_number_of_nnzs, loss_frobenius_error_materialized))

    all_results = {
        "hyperparameters": hyperparameters,
        "target_matrix": A,
        "results": results
    }
    vs = sess.run(variables)
    print([np.count_nonzero(v) for v in vs])
    return [(v + np.eye(*v.shape), np.count_nonzero(v))
            for v in vs], all_results
Beispiel #36
0
def sparse_factorize(target_matrix, **hyperparameters):
    return successive_factorization(target_matrix)

    hyperparameter_defaults = {
        "l1_parameter": 0.05,  # Tune
        "l1_parameter_growth": 1,
        "grow_l1_every_n_iter": 1000,
        "intermediate_dimension": 2000,  # Tune
        "ntrain_iters": 2000,
        "n_matrices_to_factorize_into": 20,  # Tune 
        "lr": 1e-2,
        "lr_decay": .995,
        "decay_lr_every_n_iter": 1000,
        "init_normal_stdev": .01,
        "zero_out_threshold": 8e-3,  # Tune,
        "dense_factorize": False,
        "print_every": 100
    }

    hyperparameters = merge_dict(hyperparameters, hyperparameter_defaults)

    print("sparse_factorize: Using hyperparameters")
    print(hyperparameters)

    M, K = target_matrix.shape
    Z = hyperparameters["intermediate_dimension"]

    if hyperparameters["dense_factorize"]:

        n_mats = hyperparameters["n_matrices_to_factorize_into"]
        first_shape, last_shape = (M, Z * (n_mats - 2 + 1)), (Z, K)
        intermediate_shapes = []
        for i in range(n_mats - 2):
            intermediate_shapes.insert(0, (Z, (i + 1) * Z))

        to_optimize = [
            tf.Variable(tf.random_normal(
                shp, stddev=hyperparameters["init_normal_stdev"]) +
                        tf.eye(*shp),
                        dtype=tf.float32)
            for shp in [first_shape] + intermediate_shapes + [last_shape]
        ]

        cur_matrix = to_optimize[0]
        for ind, matrix in enumerate(to_optimize[1:]):
            cur_matrix_shape = cur_matrix.get_shape().as_list()
            matrix_shape = matrix.get_shape().as_list()
            print(cur_matrix_shape[1], matrix_shape[0], matrix_shape[1], Z)
            print(ind, len(to_optimize))
            if ind != len(to_optimize[1:]) - 1:
                stacked = tf.concat([tf.eye(matrix_shape[1]), matrix], axis=0)
            else:
                stacked = matrix
            #cur_matrix = tf.matmul(cur_matrix, tf.eye(*tuple(stacked.get_shape().as_list()) + stacked))
            cur_matrix = tf.matmul(cur_matrix, stacked)
    else:
        # Construct graph
        # Construct variables to optimize
        to_optimize = [
            tf.Variable(tf.random_normal(
                (M, Z), stddev=hyperparameters["init_normal_stdev"]) +
                        tf.eye(M, Z),
                        dtype=tf.float32)
        ] + [
            tf.Variable(tf.random_normal(
                (Z, Z), stddev=hyperparameters["init_normal_stdev"]) +
                        tf.eye(Z),
                        dtype=tf.float32)
            for i in range(hyperparameters["n_matrices_to_factorize_into"] - 2)
        ] + [
            tf.Variable(tf.random_normal(
                (Z, K), stddev=hyperparameters["init_normal_stdev"]) +
                        tf.eye(Z, K),
                        dtype=tf.float32)
        ]

        cur_matrix = to_optimize[0]
        for matrix in to_optimize[1:]:
            cur_matrix = tf.matmul(cur_matrix, matrix)

    # Matrix placeholders
    target_placeholder = tf.placeholder(tf.float32, shape=target_matrix.shape)

    # Create loss
    raw_mse_loss = tf.norm(cur_matrix - target_placeholder)

    # Add regularization
    loss = raw_mse_loss
    l1_parameter_placeholder = tf.placeholder(dtype=tf.float32)
    l1_regularizer = tf.contrib.layers.l1_regularizer(
        #scale=hyperparameters["l1_parameter"], scope=None
        scale=1.0,
        scope=None)
    regularization_penalty = tf.contrib.layers.apply_regularization(
        l1_regularizer, to_optimize)
    loss += l1_parameter_placeholder * regularization_penalty

    # Create optimizer
    lr_placeholder = tf.placeholder(dtype=tf.float32)
    opt = tf.train.GradientDescentOptimizer(learning_rate=lr_placeholder)
    #opt = tf.train.AdamOptimizer()
    minimize = opt.minimize(loss)

    # 0 out values that are small
    zero_ops = []
    with tf.control_dependencies([minimize]):
        for matrix in to_optimize:
            mask = tf.cast(
                tf.greater(
                    tf.abs(matrix), hyperparameters["zero_out_threshold"] *
                    tf.ones_like(matrix, dtype=tf.float32)), tf.float32)
            zero_ops.append(tf.assign(matrix, tf.multiply(mask, matrix)))

        minimize = tf.group(zero_ops)

    # Train
    cur_learning_rate = hyperparameters["lr"]
    log_data = []
    cur_l1 = hyperparameters["l1_parameter"]

    sess = tf.Session()
    sess.run(tf.global_variables_initializer())
    for train_iter in range(hyperparameters["ntrain_iters"]):

        if train_iter % hyperparameters["decay_lr_every_n_iter"] == 0:
            cur_learning_rate *= hyperparameters["lr_decay"]

        if train_iter % hyperparameters["grow_l1_every_n_iter"] == 0:
            cur_l1 *= hyperparameters["l1_parameter_growth"]

        sampled_x = np.random.randn(target_matrix.shape[1], 1)
        total_loss_materialized, loss_mse_materialized, _ = (sess.run(
            [loss, raw_mse_loss, minimize],
            feed_dict={
                target_placeholder: target_matrix,
                lr_placeholder: cur_learning_rate,
                l1_parameter_placeholder: cur_l1
            }))

        if train_iter % hyperparameters["print_every"] == 0:

            # Compute sum of nnz elements
            n_nnz_elements = 0
            for matrix in to_optimize:
                matrix_materialized = sess.run(matrix)
                n_nnz_elements += np.count_nonzero(matrix_materialized)

            print(
                "Iteration %d of %d, Total Loss (+l1 penalty): %g; MSE loss: %g, nnz: %d"
                % (train_iter, hyperparameters["ntrain_iters"],
                   total_loss_materialized, loss_mse_materialized,
                   n_nnz_elements))

            log_data.append({
                "total_loss": total_loss_materialized,
                "loss_mse": loss_mse_materialized,
                "nnz": n_nnz_elements
            })

    # Compute number of nonzero elements
    n_nnz_elements = 0
    for matrix in to_optimize:
        matrix_materialized = sess.run(matrix)
        n_nnz_elements += np.count_nonzero(matrix_materialized)
    original_nnz_elements = np.count_nonzero(target_matrix)

    # Compute frobenius error
    product_of_matrices = sess.run(cur_matrix)
    frobenius_error = np.linalg.norm(product_of_matrices - target_matrix)
    target_matrix_error = np.linalg.norm(target_matrix)

    # Materialize actual matrices
    materialized_factorizations = []
    for matrix in to_optimize:
        matrix_materialized = sess.run(matrix)
        materialized_factorizations.append(matrix_materialized)

    # Return results
    results = {
        "factorized_matrices": materialized_factorizations,
        "product_of_matrices": product_of_matrices,
        "frobenius_error": frobenius_error,
        "original_nnz_elements": original_nnz_elements,
        "target_matrix": target_matrix,
        "n_nnz_elements": n_nnz_elements,
        "hyperparameter_setting": hyperparameters,
    }

    # Print summary
    print("sparse_factorize: Summary")
    print("-------------------------")
    print("original matrix nnz elements: %d" % original_nnz_elements)
    print("nnz elements: %d" % n_nnz_elements)
    print("frobenius error: %g" % frobenius_error)
    print("target matrix norm: %g" % target_matrix_error)
    print("nnzs of factorized matrices: %s" %
          str([np.count_nonzero(x) for x in materialized_factorizations]))
    return results
Beispiel #37
0
def create_model(inputs, targets):
    with tf.variable_scope("generator") as scope:
        out_channels = int(targets.get_shape()[-1])
        outputs = create_generator(inputs, out_channels)

    # create two copies of discriminator, one for real pairs and one for fake pairs
    # they share the same underlying variables
    with tf.name_scope("real_discriminator"):
        with tf.variable_scope("discriminator"):
            # 2x [batch, height, width, channels] => [batch, 30, 30, 1]
            predict_real = create_discriminator(inputs, targets)

    with tf.name_scope("fake_discriminator"):
        with tf.variable_scope("discriminator", reuse=True):
            # 2x [batch, height, width, channels] => [batch, 30, 30, 1]
            predict_fake = create_discriminator(inputs, outputs)

    with tf.name_scope("discriminator_loss"):
        # minimizing -tf.log will try to get inputs to 1
        # predict_real => 1
        # predict_fake => 0
        dloss_GAN = tf.reduce_mean(
            -(tf.log(tf.sigmoid(predict_real) + EPS) +
              tf.log(1 - tf.sigmoid(predict_fake) + EPS)))
        dloss_WGAN = tf.reduce_mean(predict_fake -
                                    predict_real)  #@luyi wgan critic loss
        discrim_loss = tf.identity(dloss_WGAN) if a.wgan else tf.identity(
            dloss_GAN)  #discriminator loss either from wgan or gan

    with tf.name_scope("generator_loss"):
        # predict_fake => 1
        # abs(targets - outputs) => 0
        gloss_GAN = tf.reduce_mean(-tf.log(tf.sigmoid(predict_fake) + EPS))
        gloss_WGAN = tf.reduce_mean(-predict_fake)  #@luyi wgan generator loss
        gen_loss = tf.identity(gloss_WGAN) if a.wgan else tf.identity(
            gloss_GAN)  #@luyi generator loss either from wgan or gan
        gloss_L1 = tf.reduce_mean(tf.abs(targets - outputs))

    with tf.name_scope("discriminator_train"):
        discrim_tvars = [
            var for var in tf.trainable_variables()
            if var.name.startswith("discriminator")
        ]
        print('Discriminator Variables:')
        for var in discrim_tvars:
            print(var.name)
        if not a.wgan:
            discrim_optim = tf.train.AdamOptimizer(a.lr, a.beta1)
        else:
            discrim_optim = tf.train.RMSPropOptimizer(a.lr)
        clipped_var = [
            tf.assign(var, tf.clip_by_value(var, -clip, clip))
            for var in discrim_tvars
        ]  #@luyi wgan clip discriminator variables
        with tf.control_dependencies(clipped_var):  #@luyi clip variables first
            discrim_grads_and_vars = discrim_optim.compute_gradients(
                discrim_loss, var_list=discrim_tvars)
            discrim_train = discrim_optim.apply_gradients(
                discrim_grads_and_vars)

    with tf.name_scope("generator_train"):
        #with tf.control_dependencies([discrim_train]):
        gen_tvars = [
            var for var in tf.trainable_variables()
            if var.name.startswith("generator")
        ]
        print('Generator Variables:')
        for var in gen_tvars:
            print(var.name)
        if not a.wgan:
            gen_optim = tf.train.AdamOptimizer(a.lr, a.beta1)
        else:
            gen_optim = tf.train.RMSPropOptimizer(a.lr)  #@luyi optimizer
        gen_grads_and_vars = gen_optim.compute_gradients(
            gen_loss * a.gan_weight + gloss_L1 * a.l1_weight,
            var_list=gen_tvars)
        gen_train = gen_optim.apply_gradients(gen_grads_and_vars)

    ema = tf.train.ExponentialMovingAverage(decay=0.99)
    update_losses = ema.apply(
        [dloss_WGAN, gloss_WGAN, dloss_GAN, gloss_GAN, gloss_L1])

    global_step = tf.contrib.framework.get_or_create_global_step()
    incr_global_step = tf.assign(global_step, global_step + 1)

    return Model(predict_real=predict_real,
                 predict_fake=predict_fake,
                 dloss_GAN=ema.average(dloss_GAN),
                 dloss_WGAN=ema.average(dloss_WGAN),
                 discrim_grads_and_vars=discrim_grads_and_vars,
                 gloss_GAN=ema.average(gloss_GAN),
                 gloss_WGAN=ema.average(gloss_WGAN),
                 gloss_L1=ema.average(gloss_L1),
                 gen_grads_and_vars=gen_grads_and_vars,
                 outputs=outputs,
                 update_losses=update_losses,
                 gen_train=gen_train,
                 discrim_train=discrim_train,
                 incr_global_step=incr_global_step,
                 gen_loss=gen_loss,
                 discrim_loss=discrim_loss)
Beispiel #38
0
 num_layers = 2
 poetrys = Poetry()
 words_size = len(poetrys.word_to_id)
 inputs = tf.placeholder(tf.int32, [batch_size, None])
 targets = tf.placeholder(tf.int32, [batch_size, None])
 keep_prob = tf.placeholder(tf.float32, name='keep_prob')
 model = poetryModel()
 logits, probs, initial_state, last_state = model.create_model(
     inputs, batch_size, rnn_size, words_size, num_layers, True, keep_prob)
 loss = model.loss_model(words_size, targets, logits)
 learning_rate = tf.Variable(0.0, trainable=False)
 optimizer = model.optimizer_model(loss, learning_rate)
 saver = tf.train.Saver()
 with tf.Session() as sess:
     sess.run(tf.global_variables_initializer())
     sess.run(tf.assign(learning_rate, 0.002 * 0.97))
     next_state = sess.run(initial_state)
     step = 0
     while True:
         x_batch, y_batch = poetrys.next_batch(batch_size)
         feed = {
             inputs: x_batch,
             targets: y_batch,
             initial_state: next_state,
             keep_prob: 0.5
         }
         train_loss, _, next_state = sess.run([loss, optimizer, last_state],
                                              feed_dict=feed)
         print("step:%d loss:%f" % (step, train_loss))
         if step > 40000:
             break
 def increment_total_steps(self):
     self.sess.run(tf.assign(self.total_steps, self.total_steps + 1))
Beispiel #40
0
    def build_model(self):
        #placeholder
        self.u = tf.placeholder(tf.int32, [
            None,
        ])  # user idx [B]
        self.hist_i = tf.placeholder(tf.int32,
                                     [None, None])  # history click[B, T]
        self.sl = tf.placeholder(tf.int32, [
            None,
        ])  # history len [B]
        self.last = tf.placeholder(tf.int32, [
            None,
        ])  # last click[B]
        self.basic = tf.placeholder(
            tf.float32, [None, None])  #user basic feature[B,basic_size]
        self.sub_sample = tf.placeholder(
            tf.int32,
            [None, None])  # soft layer (pos_clict,neg_list)[B,sub_size]
        self.y = tf.placeholder(tf.float32, [None, None])  # label one hot[B]
        self.lr = tf.placeholder(tf.float64, [])

        #emb variable
        item_emb_w = tf.get_variable("item_emb_w",
                                     [self.item_count, self.embedding_size])
        item_b = tf.get_variable("item_b", [self.item_count],
                                 initializer=tf.constant_initializer(0.0))
        brand_emb_w = tf.get_variable("brand_emb_w",
                                      [self.brand_count, self.embedding_size])
        msort_emb_w = tf.get_variable("msort_emb_w",
                                      [self.msort_count, self.embedding_size])

        brand_list = tf.convert_to_tensor(self.brand_list, dtype=tf.int32)
        msort_list = tf.convert_to_tensor(self.msort_list, dtype=tf.int32)

        #historty seq
        hist_b = tf.gather(brand_list, self.hist_i)
        hist_m = tf.gather(msort_list, self.hist_i)

        h_emb = tf.concat([
            tf.nn.embedding_lookup(item_emb_w, self.hist_i),
            tf.nn.embedding_lookup(brand_emb_w, hist_b),
            tf.nn.embedding_lookup(msort_emb_w, hist_m)
        ],
                          axis=2)
        #historty mask
        mask = tf.sequence_mask(self.sl, tf.shape(h_emb)[1],
                                dtype=tf.float32)  #[B,T]
        mask = tf.expand_dims(mask, -1)  #[B,T,1]
        mask = tf.tile(mask, [1, 1, tf.shape(h_emb)[2]])  #[B,T,3*e]

        h_emb *= mask  #[B,T,3*e]
        hist = tf.reduce_sum(h_emb, 1)  #[B,3*e]
        hist = tf.div(hist,
                      tf.cast(
                          tf.tile(tf.expand_dims(self.sl, 1),
                                  [1, 3 * self.embedding_size]),
                          tf.float32))  #[B,3*e]
        #last
        last_b = tf.gather(brand_list, self.last)
        last_m = tf.gather(msort_list, self.last)
        l_emb = tf.concat([
            tf.nn.embedding_lookup(item_emb_w, self.last),
            tf.nn.embedding_lookup(brand_emb_w, last_b),
            tf.nn.embedding_lookup(msort_emb_w, last_m)
        ],
                          axis=1)
        #net input
        self.input = tf.concat([hist, l_emb], axis=-1)
        # print('',)

        # dd net
        bn = tf.layers.batch_normalization(inputs=self.input, name='b1')
        layer_1 = tf.layers.dense(bn, 1024, activation=tf.nn.relu, name='f1')
        layer_2 = tf.layers.dense(layer_1,
                                  512,
                                  activation=tf.nn.relu,
                                  name='f2')
        layer_3 = tf.layers.dense(layer_2,
                                  3 * self.embedding_size,
                                  activation=tf.nn.relu,
                                  name='f3')

        #softmax
        if self.is_training:
            sa_b = tf.gather(brand_list, self.sub_sample)
            sa_m = tf.gather(msort_list, self.sub_sample)

            sample_w = tf.concat([
                tf.nn.embedding_lookup(item_emb_w, self.sub_sample),
                tf.nn.embedding_lookup(brand_emb_w, sa_b),
                tf.nn.embedding_lookup(msort_emb_w, sa_m)
            ],
                                 axis=2)  #[B,sample,3*e]
            #sample_w=tf.nn.embedding_lookup(item_emb_w,self.sub_sample)
            sample_b = tf.nn.embedding_lookup(item_b,
                                              self.sub_sample)  #[B,sample]
            user_v = tf.expand_dims(layer_3, 1)  #[B,1,3*e]
            sample_w = tf.transpose(sample_w, perm=[0, 2, 1])  #[B,3*e,sample]
            self.logits = tf.squeeze(tf.matmul(user_v, sample_w),
                                     axis=1) + sample_b

            # Step variable
            self.global_step = tf.Variable(0,
                                           trainable=False,
                                           name='global_step')
            self.global_epoch_step = tf.Variable(0,
                                                 trainable=False,
                                                 name='global_epoch_step')
            self.global_epoch_step_op = tf.assign(self.global_epoch_step,
                                                  self.global_epoch_step + 1)
            '''
        self.loss = tf.reduce_mean(
            tf.nn.softmax_cross_entropy_with_logits(
                logits=self.logits,
                labels=self.y)
           )
        '''
            self.yhat = tf.nn.softmax(self.logits)

            self.loss = tf.reduce_mean(-self.y * tf.log(self.yhat + 1e-24))

            trainable_params = tf.trainable_variables()
            self.opt = tf.train.GradientDescentOptimizer(learning_rate=self.lr)
            gradients = tf.gradients(self.loss, trainable_params)
            clip_gradients, _ = tf.clip_by_global_norm(gradients, 5)
            self.train_op = self.opt.apply_gradients(
                zip(clip_gradients, trainable_params),
                global_step=self.global_step)

        else:
            all_emb = tf.concat([
                item_emb_w,
                tf.nn.embedding_lookup(brand_emb_w, brand_list),
                tf.nn.embedding_lookup(msort_emb_w, msort_list)
            ],
                                axis=1)
            self.logits = tf.matmul(layer_3, all_emb,
                                    transpose_b=True) + item_b
            self.output = tf.nn.softmax(self.logits)
Beispiel #41
0
    p1p2_weights = p1_weights * p2_weights
    p1p2_weights_upper = upper(p1p2_weights)
    Kw_xx_upper = K_xx * p1p2_weights_upper
    Kw_xy = K_xy * p1_weights

    mmd = (tf.reduce_sum(Kw_xx_upper) / num_combos_xx +
           tf.reduce_sum(K_yy_upper) / num_combos_yy -
           2 * tf.reduce_mean(Kw_xy))

    return mmd


###############################################################################
# Build model.
lr = tf.Variable(learning_rate_init, name='lr', trainable=False)
lr_update = tf.assign(lr, tf.maximum(lr * 0.5, 1e-8), name='lr_update')

z = tf.placeholder(tf.float32, shape=[batch_size, noise_dim], name='z')
z_sample = tf.placeholder(tf.float32, shape=[None, noise_dim], name='z_sample')
x = tf.placeholder(tf.float32, shape=[batch_size, data_dim], name='x')
x_weights = tf.placeholder(tf.float32, shape=[batch_size, 1], name='x_weights')

g, g_vars = generator(z, reuse=False)
g_sample, _ = generator(z_sample, reuse=True)
d_real, d_logit_real, d_vars = discriminator(x, reuse=False)
d_fake, d_logit_fake, _ = discriminator(g, reuse=True)

# Define losses.
mmd = compute_mmd_iw_median_of_means(x, g, x_weights)
g_loss = mmd
Beispiel #42
0
print(hypothesis.shape, Y.shape)

# diff
assert hypothesis.shape.as_list() == Y.shape.as_list()
diff = (hypothesis - Y)

# Back prop (chain rule)
d_l1 = diff
d_b = d_l1
d_w = tf.matmul(tf.transpose(X), d_l1)

print(X, d_l1, d_w)

# Updating network using gradients
learning_rate = 1e-6
step = [
    tf.assign(W, W - learning_rate * d_w),
    tf.assign(b, b - learning_rate * tf.reduce_mean(d_b)),
]

# 7. Running and testing the training process
RMSE = tf.reduce_mean(tf.square((Y - hypothesis)))

sess = tf.InteractiveSession()
init = tf.global_variables_initializer()
sess.run(init)

for i in range(10000):
    print(i, sess.run([step, RMSE], feed_dict={X: x_data, Y: y_data}))

print(sess.run(hypothesis, feed_dict={X: x_data}))
def main(args):
    """
    restores first source graph and (if exists) checkpoints
    restores selected variables from target graph with checkpoint weights
        selected variables of target graph will be overwritten
    saves new target graph with checkpoints
    """
    if args.sourcecheckpoint is None:
        source_ckpt_path = tf.train.latest_checkpoint(args.source)
    else:
        source_ckpt_path = join(args.source, args.sourcecheckpoint)

    if args.targetcheckpoint is None:
        target_ckpt_path = tf.train.latest_checkpoint(args.target)
    else:
        target_ckpt_path = join(args.target, args.targetcheckpoint)

    if args.compare:
        if target_ckpt_path is not None:
            print_compare(source_ckpt_path, target_ckpt_path)
        else:
            print "no target checkpoint present..."
        return  # exit

    graph = tf.Graph()
    with graph.as_default() as g:

        # create dummy data iterator
        tf.data.TFRecordDataset("").make_initializable_iterator()

        graph_path = join(args.target, "graph.meta")
        # import meta graph from target model
        print("importing meta graph {}".format(graph_path))
        tf.train.import_meta_graph(graph_path)

        with tf.Session(graph=g) as sess:
            sess.run(tf.global_variables_initializer())

            targetsaver = tf.train.Saver()

            # if target checkpoint exists restore variables first
            if target_ckpt_path is not None:
                print("target checkpoint {}".format(target_ckpt_path))
                targetsaver.restore(sess, target_ckpt_path)
                print("restoring variables from target checkpoint")

            global_step_op = tf.get_default_graph().get_operation_by_name(
                "global_step").outputs[0]
            samples_seen_op = tf.get_default_graph().get_operation_by_name(
                "samples_seen").outputs[0]

            # parse restore variables from --variable flag or --scope flag
            if args.variables is not None:
                vars_dict = get_variable_dictionary_by_variables(
                    args.variables)
            elif args.scopes is not None:
                vars_dict = get_variable_dictionary_by_scope(args.scopes)
            else:
                vars_dict = get_variable_dictionary_by_valid_variables(
                    srccheckpoint=source_ckpt_path,
                    trgcheckpoint=target_ckpt_path)

            for var in sorted(vars_dict.keys()):
                print("restoring variable {}".format(var))

            sourcesaver = tf.train.Saver(var_list=vars_dict)

            print("restoring selected variables from source checkpoing {}".
                  format(source_ckpt_path))
            sourcesaver.restore(sess, source_ckpt_path)

            step = sess.run(global_step_op)

            checkpoint = join(args.target, "model.ckpt")

            if args.reset:
                sess.run([
                    tf.assign(global_step_op, 0),
                    tf.assign(samples_seen_op, 0)
                ])
                step = 0

            if not args.dry:
                print("saving variables to {}".format(checkpoint))
                targetsaver.save(sess, checkpoint, global_step=step)
    grid = 8
    image = image[:h // grid * grid, :w // grid * grid, :]
    mask = mask[:h // grid * grid, :w // grid * grid, :]
    print('Shape of image: {}'.format(image.shape))

    image = np.expand_dims(image, 0)
    mask = np.expand_dims(mask, 0)
    input_image = np.concatenate([image, mask], axis=2)

    sess_config = tf.ConfigProto()
    sess_config.gpu_options.allow_growth = True
    with tf.Session(config=sess_config) as sess:
        input_image = tf.constant(input_image, dtype=tf.float32)
        output = model.build_server_graph(input_image, config=config)
        output = (output + 1.) * 127.5
        output = tf.reverse(output, [-1])
        output = tf.saturate_cast(output, tf.uint8)
        # load pretrained model
        vars_list = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES)
        assign_ops = []
        for var in vars_list:
            vname = var.name
            from_name = vname
            var_value = tf.contrib.framework.load_variable(
                args.checkpoint_dir, from_name)
            assign_ops.append(tf.assign(var, var_value))
        sess.run(assign_ops)
        print('Model loaded.')
        result = sess.run(output)
        cv2.imwrite(args.output, result[0][:, :, ::-1])
    def __init__(self, args, session, updates=None):
        self.args = args
        self.sess = session

        # updates
        if not updates:
            updates = 0
        self.updates = updates
        self.global_step = tf.get_variable(
            'global_step',
            shape=(),
            dtype=tf.float32,
            initializer=tf.constant_initializer(updates),
            trainable=False)
        self.step = tf.assign_add(self.global_step, 1)

        # placeholders
        table = HashTable(TextFileIdTableInitializer(
            filename=os.path.join(args.output_dir, 'vocab.txt')),
                          default_value=Vocab.unk())
        self.q1_string = tf.placeholder(tf.string, [None, None], name='q1_str')
        self.q2_string = tf.placeholder(tf.string, [None, None], name='q2_str')
        self.q1 = tf.placeholder_with_default(table.lookup(self.q1_string),
                                              [None, None],
                                              name='q1')
        self.q2 = tf.placeholder_with_default(table.lookup(self.q2_string),
                                              [None, None],
                                              name='q2')
        self.q1_len = tf.placeholder(tf.int32, [None], name='q1_len')
        self.q2_len = tf.placeholder(tf.int32, [None], name='q2_len')
        self.y = tf.placeholder(tf.int32, [None], name='y')
        self.dropout_keep_prob = tf.placeholder(tf.float32, (),
                                                name='dropout_keep_prob')
        self.batchsize_a = tf.shape(self.q1_len)[0]
        self.batchsize_b = tf.shape(self.q2_len)[0]
        self.maxlen = tf.reduce_max(self.q1_len)
        self.maxlen = tf.maximum(self.maxlen, tf.reduce_max(self.q2_len))
        q1_mask = tf.expand_dims(tf.sequence_mask(self.q1_len,
                                                  maxlen=self.maxlen,
                                                  dtype=tf.float32),
                                 dim=-1)
        q2_mask = tf.expand_dims(tf.sequence_mask(self.q2_len,
                                                  maxlen=self.maxlen,
                                                  dtype=tf.float32),
                                 dim=-1)
        devices = self.get_available_gpus() or ['/device:CPU:0']
        if not args.multi_gpu:
            devices = devices[:1]
        if len(devices) == 1:
            splits = 1
        else:
            splits = [tf.shape(self.q1)[0] // len(devices)
                      ] * (len(devices) - 1) + [-1]  # handle uneven split

        q1 = tf.split(self.q1, splits)
        q2 = tf.split(self.q2, splits)
        q1_mask = tf.split(q1_mask, splits)
        q2_mask = tf.split(q2_mask, splits)
        y = tf.split(self.y, splits)

        # network
        self.network = Network(args)

        # optimizer
        lr = tf.get_variable('lr',
                             shape=(),
                             dtype=tf.float32,
                             initializer=tf.constant_initializer(args.lr),
                             trainable=False)
        lr_next = tf.cond(self.global_step < args.lr_warmup_steps,
                          true_fn=lambda: args.min_lr +
                          (args.lr - args.min_lr) / max(
                              1, args.lr_warmup_steps) * self.global_step,
                          false_fn=lambda: tf.maximum(
                              args.min_lr,
                              args.lr * args.lr_decay_rate**tf.floor(
                                  (self.global_step - args.lr_warmup_steps
                                   ) / args.lr_decay_steps)))
        tf.add_to_collection(tf.GraphKeys.UPDATE_OPS,
                             tf.assign(lr, lr_next, name='update_lr'))
        self.lr = lr
        self.opt = tf.train.AdamOptimizer(learning_rate=lr,
                                          beta1=args.beta1,
                                          beta2=args.beta2)

        # training graph
        tower_names = ['tower-{}'.format(i) for i in range(len(devices))
                       ] if len(devices) > 1 else ['']
        tower_logits = []
        tower_grads = []
        summaries = []
        loss = 0

        with tf.variable_scope(tf.get_variable_scope()):
            for i, device in enumerate(devices):
                with tf.device(device):
                    with tf.name_scope(tower_names[i]) as scope:
                        logits = self.network(q1[i],
                                              q2[i],
                                              q1_mask[i],
                                              q2_mask[i],
                                              self.dropout_keep_prob,
                                              batchsize_a=self.batchsize_a,
                                              batchsize_b=self.batchsize_b)
                        tower_logits.append(logits)
                        loss = self.get_loss(logits, y[i])
                        tf.get_variable_scope().reuse_variables()
                        summaries = tf.get_collection(tf.GraphKeys.SUMMARIES,
                                                      scope)
                        grads = self.opt.compute_gradients(loss)
                        tower_grads.append(grads)
        gradients = []
        variables = []
        for grad_and_vars in zip(*tower_grads):
            if grad_and_vars[0][0] is None:
                msg = 'WARNING: trainable variable {} receives no grad.\n'.format(
                    grad_and_vars[0][1].op.name)
                sys.stderr.write(msg)
                continue
            grad = tf.stack([g for g, _ in grad_and_vars])
            grad = tf.reduce_mean(grad, 0)
            v = grad_and_vars[0][
                1]  # use the first tower's pointer to the (shared) variable
            gradients.append(grad)
            variables.append(v)

        gradients, self.gnorm = tf.clip_by_global_norm(gradients,
                                                       self.args.grad_clipping)
        update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
        with tf.control_dependencies(update_ops):
            self.train_op = self.opt.apply_gradients(zip(gradients, variables))
        logits = tf.concat(tower_logits, 0)
        self.prob = tf.nn.softmax(logits, dim=1, name='prob')
        self.pred = tf.argmax(input=logits, axis=1, name='pred')
        self.loss = tf.identity(loss, name='loss')
        summaries.append(tf.summary.scalar('training/lr', lr))
        summaries.append(tf.summary.scalar('training/gnorm', self.gnorm))
        summaries.append(tf.summary.scalar('training/loss', self.loss))

        # add summary
        self.summary = tf.summary.merge(summaries)

        # saver
        self.saver = tf.train.Saver(
            [var for var in tf.global_variables() if 'Adam' not in var.name],
            max_to_keep=args.max_checkpoints)
def main(_):
  best_acc = 0
  best_step = 0
  best_acc_istrain = 0
  best_step_istrain = 0
  # We want to see all the logging messages for this tutorial.
  tf.logging.set_verbosity(tf.logging.INFO)

  # Start a new TensorFlow session.
  sess = tf.InteractiveSession()

  # Begin by making sure we have the training data we need. If you already have
  # training data of your own, use `--data_url= ` on the command line to avoid
  # downloading.
  model_settings = models.prepare_model_settings(
      len(new_features_input.prepare_words_list_my(FLAGS.wanted_words.split(','))),
      FLAGS.sample_rate, FLAGS.clip_duration_ms, FLAGS.window_size_ms,
      FLAGS.window_stride_ms, FLAGS.dct_coefficient_count)
  audio_processor = new_features_input.AudioProcessor(
      FLAGS.data_dir, FLAGS.silence_percentage,
      FLAGS.wanted_words.split(','), FLAGS.validation_percentage,
      FLAGS.testing_percentage)
  fingerprint_size = model_settings['fingerprint_size']
  label_count = model_settings['label_count']

  training_steps_list = list(map(int, FLAGS.how_many_training_steps.split(',')))
  learning_rates_list = list(map(float, FLAGS.learning_rate.split(',')))
  if len(training_steps_list) != len(learning_rates_list):
    raise Exception(
        '--how_many_training_steps and --learning_rate must be equal length '
        'lists, but are %d and %d long instead' % (len(training_steps_list),
                                                   len(learning_rates_list)))
##############################################
  ############tensorflow modules##########

  fingerprint_input = tf.placeholder(
      tf.float32, [None, fingerprint_size], name='fingerprint_input')

  # ############ 模型创建 ##########
  istrain = tf.placeholder(tf.bool, name='istrain')
  logits= models.create_model(
      fingerprint_input,
      model_settings,
      FLAGS.model_architecture,
      is_training=istrain)
  ############ 模型创建 ##########
  # logits, dropout_prob= models.create_model(
  #     fingerprint_input,
  #     model_settings,
  #     FLAGS.model_architecture,
  #     is_training=True)
  # Define loss and optimizer

  ############ 真实值 ##########
  ground_truth_input = tf.placeholder(
      tf.float32, [None, label_count], name='groundtruth_input')

  # Optionally we can add runtime checks to spot when NaNs or other symptoms of
  # numerical errors start occurring during training.
  control_dependencies = []
  if FLAGS.check_nans:
    checks = tf.add_check_numerics_ops()
    control_dependencies = [checks]

  # Create the back propagation and training evaluation machinery in the graph.
  ############ 交叉熵计算 ##########
  # with tf.name_scope('cross_entropy'):
  #   cross_entropy_mean = tf.reduce_mean(
  #       tf.nn.softmax_cross_entropy_with_logits(
  #           labels=ground_truth_input, logits=logits)) + beta*loss_norm
  with tf.name_scope('cross_entropy'):
    cross_entropy_mean = tf.reduce_mean(
        tf.nn.softmax_cross_entropy_with_logits(
            labels=ground_truth_input, logits=logits))
  tf.summary.scalar('cross_entropy', cross_entropy_mean)

  ############ 学习率、准确率、混淆矩阵 ##########
  # learning_rate_input    学习率输入(tf.placeholder)
  # train_step             训练过程 (优化器)
  # predicted_indices      预测输出索引
  # expected_indices       实际希望输出索引
  # correct_prediction     正确预测矩阵
  # confusion_matrix       混淆矩阵
  # evaluation_step        正确分类概率(每个阶段)
  # global_step            全局训练阶段
  # increment_global_step  全局训练阶段递增

  learning_rate_input = tf.placeholder(
      tf.float32, [], name='learning_rate_input')
  update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
  with tf.control_dependencies(update_ops):
    train_step = tf.train.AdamOptimizer(
        learning_rate_input).minimize(cross_entropy_mean)
  # with tf.name_scope('train'), tf.control_dependencies(control_dependencies):
  #   learning_rate_input = tf.placeholder(
  #       tf.float32, [], name='learning_rate_input')
  #  # train_step = tf.train.GradientDescentOptimizer(
  #     #  learning_rate_input).minimize(cross_entropy_mean)
  #   with tf.control_dependencies(update_ops):
  #       train_step = tf.train.AdamOptimizer(
  #           learning_rate_input).minimize(cross_entropy_mean)
  predicted_indices = tf.argmax(logits, 1)
  expected_indices = tf.argmax(ground_truth_input, 1)
  correct_prediction = tf.equal(predicted_indices, expected_indices)
  confusion_matrix = tf.confusion_matrix(
      expected_indices, predicted_indices, num_classes=label_count)
  evaluation_step = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
  acc = tf.summary.scalar('accuracy', evaluation_step)

  global_step = tf.train.get_or_create_global_step()
  increment_global_step = tf.assign(global_step, global_step + 1)


  saver = tf.train.Saver(tf.global_variables(),max_to_keep=None)# max keep file // moren 5

  # Merge all the summaries and write them out to /tmp/retrain_logs (by default)
  merged_summaries = tf.summary.merge_all()
  validation_merged_summaries = tf.summary.merge([tf.get_collection(tf.GraphKeys.SUMMARIES,'accuracy'),tf.get_collection(tf.GraphKeys.SUMMARIES,'cross_entropy')])
  test_summaries = tf.summary.merge([acc])
  test_summaries_istrain = tf.summary.merge([tf.get_collection(tf.GraphKeys.SUMMARIES,'accuracy'),tf.get_collection(tf.GraphKeys.SUMMARIES,'cross_entropy')])

  #test_summaries_istrain = tf.summary.merge([acc])
  train_writer = tf.summary.FileWriter(FLAGS.summaries_dir + '/train',
                                       sess.graph)
  # validation_writer = tf.summary.FileWriter(FLAGS.summaries_dir + '/validation')
  test_writer = tf.summary.FileWriter(FLAGS.summaries_dir + '/test')
  test_istrain_writer = tf.summary.FileWriter(FLAGS.summaries_dir + '/test_istrain')
  tf.global_variables_initializer().run()

  start_step = 1

  if FLAGS.start_checkpoint:
    models.load_variables_from_checkpoint(sess, FLAGS.start_checkpoint)
    start_step = global_step.eval(session=sess)

  tf.logging.info('Training from step: %d ', start_step)

  # Save graph.pbtxt.
  tf.train.write_graph(sess.graph_def, FLAGS.train_dir,
                       FLAGS.model_architecture + '.pbtxt')

  # Save list of words.
  with gfile.GFile(
      os.path.join(FLAGS.train_dir, FLAGS.model_architecture + '_labels.txt'),
      'w') as f:
    f.write('\n'.join(audio_processor.words_list))
###
  # model1: fc
  # model2: conv :940k个parameter
  # model3:low_latancy_conv:~~model1
  # model4: 750k
  # Training loop.
    #############################################
    ########            主循环              ######
    #############################################
  training_steps_max = np.sum(training_steps_list)
  for training_step in xrange(start_step, training_steps_max + 1):
    # Figure out what the current learning rate is.
    #######       自动切换学习率      #######
    if training_step <12000+1:
        learning_rate_value = learning_rates_list[0]*0.02**(training_step/12000)
    else:
        learning_rate_value = learning_rates_list[0]*0.02    #0.015 12000
    training_steps_sum = 0
    # for i in range(len(training_steps_list)):
    #   training_steps_sum += training_steps_list[i]
    #   if training_step <= training_steps_sum:
    #     learning_rate_value = learning_rates_list[i]
    #     break

    # Pull the audio samples we'll use for training.
    #######       audio处理器导入数据      ##################################
    ##get_data(self, how_many, offset, model_settings, background_frequency,
    ##         background_volume_range, time_shift, mode, sess)
    ########################################################################
    train_fingerprints, train_ground_truth = audio_processor.get_data_my(
        FLAGS.batch_size, 0, model_settings ,'training')
    #mid = np.abs(np.max(train_fingerprints) + np.min(train_fingerprints)) / 2
    #half = np.max(train_fingerprints) - np.min(train_fingerprints)
    #train_fingerprints = ((train_fingerprints + mid) / half * 255).astype(int)
    train_fingerprints_mix, train_ground_truth_mix = mixup_data(train_fingerprints, train_ground_truth, 1)
    train_fingerprints = np.append(train_fingerprints, train_fingerprints_mix, axis=0)
    train_ground_truth = np.append(train_ground_truth, train_ground_truth_mix, axis=0)
    random_index = list(np.arange(FLAGS.batch_size*2))
    np.random.shuffle(random_index)
    train_fingerprints = train_fingerprints[random_index, :]
    train_ground_truth = train_ground_truth[random_index, :]

    train_summary, train_accuracy, cross_entropy_value, _, _ = sess.run(
        [
            merged_summaries, evaluation_step, cross_entropy_mean, train_step,
            increment_global_step
        ],
        feed_dict={
            fingerprint_input: train_fingerprints,
            ground_truth_input: train_ground_truth,
            learning_rate_input: learning_rate_value,
            istrain:True
        })
    train_writer.add_summary(train_summary, training_step)
    tf.logging.info('Step #%d: rate %f, accuracy %.1f%%, cross entropy %f' %
                    (training_step, learning_rate_value, train_accuracy * 100,
                     cross_entropy_value))
    is_last_step = (training_step == training_steps_max)
    if (training_step % FLAGS.eval_step_interval) == 0 or is_last_step:

      #############################################
      ########  测试集重复计算正确率和混淆矩阵  ######
      set_size = audio_processor.set_size('testing')
      tf.logging.info('set_size=%d', set_size)
      test_fingerprints, test_ground_truth = audio_processor.get_data_my(
        -1, 0, model_settings,'testing')
      #mid = np.abs(np.max(test_fingerprints) + np.min(test_fingerprints)) / 2
      #half = np.max(test_fingerprints) - np.min(test_fingerprints)
      #test_fingerprints = ((test_fingerprints + mid) / half * 255).astype(int)
      final_summary,test_accuracy, conf_matrix = sess.run(
          [test_summaries,evaluation_step, confusion_matrix],
          feed_dict={
              fingerprint_input: test_fingerprints,
              ground_truth_input: test_ground_truth,
              istrain : False
          })
      final_summary_istrain,test_accuracy_istrain= sess.run(
          [test_summaries_istrain,evaluation_step],
          feed_dict={
              fingerprint_input: test_fingerprints,
              ground_truth_input: test_ground_truth,
              istrain : True
          })

      if test_accuracy > best_acc:
          best_acc = test_accuracy
          best_step = training_step
      if test_accuracy_istrain > best_acc_istrain:
          best_acc_istrain = test_accuracy_istrain
          best_step_istrain = training_step
      test_writer.add_summary(final_summary, training_step)
      test_istrain_writer.add_summary(final_summary_istrain, training_step)
      tf.logging.info('Confusion Matrix:\n %s' % (conf_matrix))
      tf.logging.info('test accuracy = %.1f%% (N=%d)' % (test_accuracy * 100,6882))
      tf.logging.info('test_istrain accuracy = %.1f%% (N=%d)' % (test_accuracy_istrain * 100,6882))

      tf.logging.info('Best test accuracy before now = %.1f%% (N=%d)' % (best_acc * 100,6882) + '  at step of ' + str(best_step))
      tf.logging.info('Best test_istrain accuracy before now = %.1f%% (N=%d)' % (best_acc_istrain * 100,6882) + '  at step of ' + str(best_step_istrain))
    # Save the model checkpoint periodically.
    if (training_step % FLAGS.save_step_interval == 0 or
        training_step == training_steps_max):
      checkpoint_path = os.path.join(FLAGS.train_dir + '/'+FLAGS.model_architecture,
                                     FLAGS.model_architecture + '.ckpt')
      tf.logging.info('Saving to "%s-%d"', checkpoint_path, training_step)
      saver.save(sess, checkpoint_path, global_step=training_step)
    print_line = 'Best test accuracy before now = %.1f%% (N=%d)' % (best_acc * 100,6882) + '  at step of ' + str(best_step) + '\n' + \
                 'Best test_istrain accuracy before now = %.1f%% (N=%d)' % (best_acc_istrain * 100,6882) + '  at step of ' + str(best_step_istrain)
    if training_step == training_steps_max:
        with open(FLAGS.train_dir + '/' +FLAGS.model_architecture+ '/details.txt', 'w') as f:
            f.write(print_line)
    def __init__(self, g, K, T, seed=None, Var_bds=None):
        """
        Define symbolic BFE and auxiliary objective expression to be optimized by tensorflow, given a factor graph.
        We'll use the one default tensorflow computation graph; to make sure we don't redefine it, everytime it'll
        be cleared/reset whenever a new instance of OneShot is created.
        :param g: a grounded graph corresponding to a plain old PGM; its factors must have .log_potential_fun callable
        on tf tensors
        :param K: num mixture comps
        :param T: num quad points
        :param seed:
        :param Var_bds: [lb, ub] on the variance param of Gaussian rvs
        """
        # convert potentials to log_potential_funs (b/c typically caller only sets potentials instead of log pot)
        # utils.set_log_potential_funs(g.factors_list)
        assert all([callable(f.log_potential_fun) for f in g.factors
                    ]), 'factors must have valid log_potential_fun'

        # group factors together whose log potential functions have the same call signatures
        factors_with_unique_nb_domain_types, unique_nb_domain_types = \
            utils.get_unique_subsets(g.factors_list, lambda f: f.nb_domain_types)
        print('number of unique factor domain types =',
              len(unique_nb_domain_types))
        print(unique_nb_domain_types)

        g.init_rv_indices()  # will create attributes like Vc, Vc_idx, etc.
        # g.init_nb()  # caller should have always run this (or done sth similar) to ensure g is well defined!

        tf.reset_default_graph()  # clear existing
        if seed is not None:  # note that seed that has been set prior to tf.reset_default_graph will be invalidated
            tf.set_random_seed(
                seed)  # thus we have to reseed after reset_default_graph
        zeros_K = tf.zeros(K, dtype=dtype)
        tau = tf.Variable(zeros_K, trainable=True,
                          name='tau')  # mixture weights logits
        # tau = tf.Variable(tf.random_normal([K], dtype=dtype), trainable=True, name='tau')  # mixture weights logits
        w = tf.nn.softmax(tau, name='w')  # mixture weights
        fix_mix_op = tf.assign(
            tau, zeros_K)  # op that resets mixing weights to uniform

        bfe = aux_obj = 0
        if g.Nd > 0:
            common_dstates = set(rv.dstates for rv in g.Vd)
            if len(common_dstates) == 1:
                common_dstates = common_dstates.pop()
            else:
                common_dstates = -1

            if common_dstates > 0:  # all discrete rvs have the same number of states
                # Rho = tf.Variable(tf.zeros([g.Nd, K, common_dstates], dtype=dtype), trainable=True,
                #                   name='Rho')  # dnode categorical prob logits
                Rho = tf.Variable(tf.random_normal([g.Nd, K, common_dstates],
                                                   dtype=dtype),
                                  trainable=True,
                                  name='Rho')  # dnode categorical prob logits
                Pi = tf.nn.softmax(Rho, name='Pi')
            else:  # general case when each dnode can have different num states
                # Rho = [tf.Variable(tf.zeros([K, rv.dstates], dtype=dtype), trainable=True, name='Rho_%d' % i) for
                #        (i, rv) in enumerate(g.Vd)]  # dnode categorical prob logits
                Rho = [
                    tf.Variable(tf.random_normal([K, rv.dstates], dtype=dtype),
                                trainable=True,
                                name='Rho_%d' % i)
                    for (i, rv) in enumerate(g.Vd)
                ]  # dnode categorical prob logits
                Pi = [
                    tf.nn.softmax(rho, name='Pi_%d' % i)
                    for (i, rho) in enumerate(Rho)
                ]  # convert to probs

            # assign symbolic belief vars to rvs
            for rv in g.Vd:
                i = g.Vd_idx[rv]  # ith disc node
                rv.belief_params_ = {'pi': Pi[i]}  # K x dstates[i] matrix

            # get discrete nodes' contributions to the objective
            if common_dstates > 0:  # all discrete rvs have the same number of states
                sharing_counts = [
                    rv.sharing_count for rv in g.Vd
                ]  # for lifting/param sharing; 1s if no lifting
                delta_bfe, delta_aux_obj = drvs_bfe_obj(
                    rvs=g.Vd, w=w, Pi=Pi, rvs_counts=sharing_counts)
                bfe += delta_bfe
                aux_obj += delta_aux_obj
            else:
                for rv in g.Vd:
                    delta_bfe, delta_aux_obj = drv_bfe_obj(rv, w)
                    sharing_count = rv.sharing_count
                    bfe += sharing_count * delta_bfe
                    aux_obj += sharing_count * delta_aux_obj

        clip_op = tf.no_op()  # will be replaced with real clip op if Nc > 0
        if g.Nc > 0:  # assuming Gaussian
            if Var_bds is None:
                Var_bds = [5e-3, 10]  # currently shared by all cnodes

            Mu_bds = np.empty([2, g.Nc], dtype='float')
            for n, rv in enumerate(g.Vc):
                Mu_bds[:, n] = rv.values[0], rv.values[1]  # lb, ub
            Mu_bds = Mu_bds[:, :, None] + \
                     np.zeros([2, g.Nc, K], dtype='float')  # Mu_bds[0], Mu_bds[1] give lb, ub for Mu; same for all K
            Mu = np.random.uniform(low=Mu_bds[0],
                                   high=Mu_bds[1],
                                   size=[g.Nc, K])  # init numerical value
            if init_grid:  # try spreading initial means evenly on a grid within the Mu_bds box set
                I = int(K**(
                    1 / g.Nc
                ))  # number of points per dimension; need to have I^{Nc} <= K
                slices = []
                for n, rv in enumerate(g.Vc):
                    lb, ub = rv.values[0], rv.values[1]
                    step = (ub - lb) / (I + 1)
                    slices.append(slice(lb + step, ub,
                                        step))  # no boundary points included
                grid = np.mgrid[slices]  # Nc x I x I x .. x I (Nc many Is)
                num_grid_points = int(I**g.Nc)
                grid = np.reshape(grid, [g.Nc, num_grid_points])
                grid += init_grid_noise * np.random.randn(*grid.shape)
                Mu[:, :
                   num_grid_points] = grid  # the rest have already been initialized

            Mu = tf.Variable(Mu, dtype=dtype, trainable=True, name='Mu')

            # optimize the log of Var (sigma squared), for numeric stability
            lVar_bds = np.log(Var_bds)
            # lVar = tf.Variable(np.log(np.random.uniform(low=Var_bds[0], high=Var_bds[1], size=[g.Nc, K])),
            #                    dtype=dtype, trainable=True, name='lVar')
            lVar = tf.Variable(np.random.uniform(low=lVar_bds[0],
                                                 high=lVar_bds[1],
                                                 size=[g.Nc, K]),
                               dtype=dtype,
                               trainable=True,
                               name='lVar')
            Var = tf.exp(lVar)

            clip_op = tf.group(
                tf.assign(Mu, tf.clip_by_value(Mu, *Mu_bds)),
                tf.assign(lVar, tf.clip_by_value(lVar, *lVar_bds)))

            for rv in g.Vc:
                i = g.Vc_idx[rv]  # ith cont node
                rv.belief_params_ = {
                    'mu': Mu[i],
                    'var': Var[i],
                    'var_inv': 1 / Var[i],
                    'mu_K1': tf.reshape(Mu[i], [K, 1]),
                    'var_K1': tf.reshape(Var[i], [K, 1]),
                    'var_inv_K1': tf.reshape(1 / Var[i], [K, 1])
                }

            # get continuous nodes' contribution to the objectives (assuming all Gaussian for now)
            sharing_counts = [rv.sharing_count for rv in g.Vc
                              ]  # for lifting/param sharing; 1s if no lifting
            delta_bfe, delta_aux_obj = crvs_bfe_obj(rvs=g.Vc,
                                                    T=T,
                                                    w=w,
                                                    Mu=Mu,
                                                    Var=Var,
                                                    rvs_counts=sharing_counts)
            bfe += delta_bfe
            aux_obj += delta_aux_obj

        for factors in factors_with_unique_nb_domain_types:
            factor = factors[0]
            if factor.domain_type == 'd':
                delta_bfe, delta_aux_obj = dfactors_bfe_obj(factors, w)
            else:
                assert factor.domain_type in ('c', 'h')
                delta_bfe, delta_aux_obj = hfactors_bfe_obj(factors,
                                                            T,
                                                            w,
                                                            dtype=dtype)
            bfe += delta_bfe
            aux_obj += delta_aux_obj

        self.__dict__.update(**locals())
Beispiel #48
0
 def update_epoch(self, epoch, sess):
     sess.run(self.assign_handler)
     sess.run(tf.assign(self.now_epoch, int(epoch)))
 def no_return_assign(self, ref, value):
     tf.assign(ref, value)
     return 0
def main(_):
  # Pick up any one-off hyper-parameters.
  hparams = path_model.PathBasedModel.default_hparams()

  # Set the number of classes
  classes_filename = os.path.join(
      FLAGS.dataset_dir, FLAGS.dataset, 'classes.txt')

  with open(classes_filename) as f_in:
    classes = f_in.read().splitlines()

  hparams.num_classes = len(classes)
  print('Model will predict into %d classes' % hparams.num_classes)

  # Get the datasets
  train_set, val_set, test_set = (
      os.path.join(
          FLAGS.dataset_dir, FLAGS.dataset, FLAGS.corpus,
          filename + '.tfrecs.gz')
      for filename in ['train', 'val', 'test'])

  print('Running with hyper-parameters: {}'.format(hparams))

  # Load the instances
  print('Loading instances...')
  opts = tf.python_io.TFRecordOptions(
      compression_type=tf.python_io.TFRecordCompressionType.GZIP)
  train_instances = list(tf.python_io.tf_record_iterator(train_set, opts))
  val_instances = list(tf.python_io.tf_record_iterator(val_set, opts))
  test_instances = list(tf.python_io.tf_record_iterator(test_set, opts))

  # Load the word embeddings
  print('Loading word embeddings...')
  lemma_embeddings = lexnet_common.load_word_embeddings(
      FLAGS.embeddings_base_path, hparams.lemma_embeddings_file)

  # Define the graph and the model
  with tf.Graph().as_default():
    with tf.variable_scope('lexnet'):
      options = tf.python_io.TFRecordOptions(
          compression_type=tf.python_io.TFRecordCompressionType.GZIP)
      reader = tf.TFRecordReader(options=options)
      _, train_instance = reader.read(
          tf.train.string_input_producer([train_set]))
      shuffled_train_instance = tf.train.shuffle_batch(
          [train_instance],
          batch_size=1,
          num_threads=1,
          capacity=len(train_instances),
          min_after_dequeue=100,
      )[0]

      train_model = path_model.PathBasedModel(
          hparams, lemma_embeddings, shuffled_train_instance)

    with tf.variable_scope('lexnet', reuse=True):
      val_instance = tf.placeholder(dtype=tf.string)
      val_model = path_model.PathBasedModel(
          hparams, lemma_embeddings, val_instance)

    # Initialize a session and start training
    logdir = (
        '{logdir}/results/{dataset}/path/{corpus}/supervisor.logdir'.format(
            logdir=FLAGS.logdir, dataset=FLAGS.dataset, corpus=FLAGS.corpus))

    best_model_saver = tf.train.Saver()
    f1_t = tf.placeholder(tf.float32)
    best_f1_t = tf.Variable(0.0, trainable=False, name='best_f1')
    assign_best_f1_op = tf.assign(best_f1_t, f1_t)

    supervisor = tf.train.Supervisor(
        logdir=logdir,
        global_step=train_model.global_step)

    with supervisor.managed_session() as session:
      # Load the labels
      print('Loading labels...')
      val_labels = train_model.load_labels(session, val_instances)

      save_path = '{logdir}/results/{dataset}/path/{corpus}/'.format(
          logdir=FLAGS.logdir,
          dataset=FLAGS.dataset,
          corpus=FLAGS.corpus)

      # Train the model
      print('Training the model...')

      while True:
        step = session.run(train_model.global_step)
        epoch = (step + len(train_instances) - 1) // len(train_instances)
        if epoch > hparams.num_epochs:
          break

        print('Starting epoch %d (step %d)...' % (1 + epoch, step))

        epoch_loss = train_model.run_one_epoch(session, len(train_instances))

        best_f1 = session.run(best_f1_t)
        f1 = epoch_completed(val_model, session, epoch, epoch_loss,
                             val_instances, val_labels, best_model_saver,
                             save_path, best_f1)

        if f1 > best_f1:
          session.run(assign_best_f1_op, {f1_t: f1})

        if f1 < best_f1 - 0.08:
          tf.logging.fino('Stopping training after %d epochs.\n' % epoch)
          break

      # Print the best performance on the validation set
      best_f1 = session.run(best_f1_t)
      print('Best performance on the validation set: F1=%.3f' % best_f1)

      # Save the path embeddings
      print('Computing the path embeddings...')
      instances = train_instances + val_instances + test_instances
      path_index, path_vectors = path_model.compute_path_embeddings(
          val_model, session, instances)
      path_emb_dir = '{dir}/path_embeddings/{dataset}/{corpus}/'.format(
          dir=FLAGS.embeddings_base_path,
          dataset=FLAGS.dataset,
          corpus=FLAGS.corpus)

      if not os.path.exists(path_emb_dir):
        os.makedirs(path_emb_dir)

      path_model.save_path_embeddings(
          val_model, path_vectors, path_index, path_emb_dir)
Beispiel #51
0
    def body(depth_index, state1, state2, state3, depth_image, max_prob_image,
             exp_sum, incre):
        """Loop body."""

        # calculate cost
        ave_feature = ref_tower.get_output()
        ave_feature2 = tf.square(ref_tower.get_output())
        for view in range(0, FLAGS.view_num - 1):
            homographies = view_homographies[view]
            homographies = tf.transpose(homographies, perm=[1, 0, 2, 3])
            homography = homographies[depth_index]
            # warped_view_feature = homography_warping(view_towers[view].get_output(), homography)
            warped_view_feature = tf_transform_homography(
                view_towers[view].get_output(), homography)
            ave_feature = ave_feature + warped_view_feature
            ave_feature2 = ave_feature2 + tf.square(warped_view_feature)
        ave_feature = ave_feature / FLAGS.view_num
        ave_feature2 = ave_feature2 / FLAGS.view_num
        cost = ave_feature2 - tf.square(ave_feature)
        cost.set_shape(
            [FLAGS.batch_size, feature_shape[1], feature_shape[2], 32])

        # gru
        reg_cost1, state1 = conv_gru1(-cost, state1, scope='conv_gru1')
        reg_cost2, state2 = conv_gru2(reg_cost1, state2, scope='conv_gru2')
        reg_cost3, state3 = conv_gru3(reg_cost2, state3, scope='conv_gru3')
        reg_cost = tf.layers.conv2d(reg_cost3,
                                    1,
                                    3,
                                    padding='same',
                                    reuse=tf.AUTO_REUSE,
                                    name='prob_conv')
        prob = tf.exp(reg_cost)

        # index
        d_idx = tf.cast(depth_index, tf.float32)
        if inverse_depth:
            inv_depth_start = tf.div(1.0, depth_start)
            inv_depth_end = tf.div(1.0, depth_end)
            inv_interval = (inv_depth_start - inv_depth_end) / (
                tf.cast(depth_num, 'float32') - 1)
            inv_depth = inv_depth_start - d_idx * inv_interval
            depth = tf.div(1.0, inv_depth)
        else:
            depth = depth_start + d_idx * depth_interval
        temp_depth_image = tf.reshape(depth, [FLAGS.batch_size, 1, 1, 1])
        temp_depth_image = tf.tile(temp_depth_image,
                                   [1, feature_shape[1], feature_shape[2], 1])

        # update the best
        update_flag_image = tf.cast(tf.less(max_prob_image, prob),
                                    dtype='float32')
        new_max_prob_image = update_flag_image * prob + (
            1 - update_flag_image) * max_prob_image
        new_depth_image = update_flag_image * temp_depth_image + (
            1 - update_flag_image) * depth_image
        max_prob_image = tf.assign(max_prob_image, new_max_prob_image)
        depth_image = tf.assign(depth_image, new_depth_image)

        # update counter
        exp_sum = tf.assign_add(exp_sum, prob)
        depth_index = tf.add(depth_index, incre)

        return depth_index, state1, state2, state3, depth_image, max_prob_image, exp_sum, incre
Beispiel #52
0
        
        additional_inputs_tf = tf.placeholder(tf.float32, [1])
        batch_tf, input_dict_func = helper.tf_batch_and_input_dict(batch, additional_inputs_tf)
        train_outs_dict, test_outs_dict = model.inference(batch_tf, additional_inputs_tf)
        generative_dict = model.generative_model(batch_tf)
        inference_obs_dist = model.obs_dist

        discriminator_vars = [v for v in tf.trainable_variables() if 'Discriminator' in v.name]
        generator_vars = [v for v in tf.trainable_variables() if 'Discriminator' not in v.name] 

        # Weight clipping
        discriminator_vars_flat_concat = tf.concat([tf.reshape(e, [-1]) for e in discriminator_vars], axis=0)
        max_abs_discriminator_vars = tf.reduce_max(tf.abs(discriminator_vars_flat_concat))
        clip_op_list = []
        for e in discriminator_vars:
            clip_op_list.append(tf.assign(e, tf.clip_by_value(e, -0.01, 0.01)))

    if global_args.optimizer_class == 'RmsProp':
        train_generator_step_tf = tf.train.RMSPropOptimizer(learning_rate=global_args.learning_rate, 
            momentum=0.9).minimize(train_outs_dict['generator_cost'], var_list=generator_vars, global_step=global_step)
        train_discriminator_step_tf = tf.train.RMSPropOptimizer(learning_rate=global_args.learning_rate, 
            momentum=0.9).minimize(train_outs_dict['discriminator_cost'], var_list=discriminator_vars, global_step=global_step)
    elif global_args.optimizer_class == 'Adam':
        train_generator_step_tf = tf.train.AdamOptimizer(learning_rate=0.0001, 
            beta1=0.5, beta2=0.999, epsilon=1e-08).minimize(train_outs_dict['generator_cost'], var_list=generator_vars, global_step=global_step)
        train_discriminator_step_tf = tf.train.AdamOptimizer(learning_rate=0.0001, 
            beta1=0.5, beta2=0.999, epsilon=1e-08).minimize(train_outs_dict['discriminator_cost'], var_list=discriminator_vars, global_step=global_step)

    helper.variable_summaries(train_outs_dict['generator_cost'], '/generator_cost')
    helper.variable_summaries(train_outs_dict['discriminator_cost'], '/discriminator_cost')
    init = tf.global_variables_initializer()
Beispiel #53
0
def inference_mem(images,
                  cams,
                  depth_num,
                  depth_start,
                  depth_interval,
                  is_master_gpu=True):
    """ infer depth image from multi-view images and cameras """

    # dynamic gpu params
    depth_end = depth_start + (tf.cast(depth_num, tf.float32) -
                               1) * depth_interval
    feature_c = 32
    feature_h = FLAGS.max_h / 4
    feature_w = FLAGS.max_w / 4

    # reference image
    ref_image = tf.squeeze(tf.slice(images, [0, 0, 0, 0, 0],
                                    [-1, 1, -1, -1, 3]),
                           axis=1)
    ref_cam = tf.squeeze(tf.slice(cams, [0, 0, 0, 0, 0], [-1, 1, 2, 4, 4]),
                         axis=1)

    # image feature extraction
    if is_master_gpu:
        ref_tower = UNetDS2GN({'data': ref_image},
                              is_training=True,
                              reuse=False)
    else:
        ref_tower = UNetDS2GN({'data': ref_image},
                              is_training=True,
                              reuse=True)
    ref_feature = ref_tower.get_output()
    ref_feature2 = tf.square(ref_feature)

    view_features = []
    for view in range(1, FLAGS.view_num):
        view_image = tf.squeeze(tf.slice(images, [0, view, 0, 0, 0],
                                         [-1, 1, -1, -1, -1]),
                                axis=1)
        view_tower = UNetDS2GN({'data': view_image},
                               is_training=True,
                               reuse=True)
        view_features.append(view_tower.get_output())
    view_features = tf.stack(view_features, axis=0)

    # get all homographies
    view_homographies = []
    for view in range(1, FLAGS.view_num):
        view_cam = tf.squeeze(tf.slice(cams, [0, view, 0, 0, 0],
                                       [-1, 1, 2, 4, 4]),
                              axis=1)
        homographies = get_homographies(ref_cam,
                                        view_cam,
                                        depth_num=depth_num,
                                        depth_start=depth_start,
                                        depth_interval=depth_interval)
        view_homographies.append(homographies)
    view_homographies = tf.stack(view_homographies, axis=0)

    # build cost volume by differentialble homography
    with tf.name_scope('cost_volume_homography'):
        depth_costs = []

        for d in range(depth_num):
            # compute cost (standard deviation feature)
            ave_feature = tf.Variable(
                tf.zeros([FLAGS.batch_size, feature_h, feature_w, feature_c]),
                name='ave',
                trainable=False,
                collections=[tf.GraphKeys.LOCAL_VARIABLES])
            ave_feature2 = tf.Variable(
                tf.zeros([FLAGS.batch_size, feature_h, feature_w, feature_c]),
                name='ave2',
                trainable=False,
                collections=[tf.GraphKeys.LOCAL_VARIABLES])
            ave_feature = tf.assign(ave_feature, ref_feature)
            ave_feature2 = tf.assign(ave_feature2, ref_feature2)

            def body(view, ave_feature, ave_feature2):
                """Loop body."""
                homography = tf.slice(view_homographies[view],
                                      begin=[0, d, 0, 0],
                                      size=[-1, 1, 3, 3])
                homography = tf.squeeze(homography, axis=1)
                # warped_view_feature = homography_warping(view_features[view], homography)
                warped_view_feature = tf_transform_homography(
                    view_features[view], homography)
                ave_feature = tf.assign_add(ave_feature, warped_view_feature)
                ave_feature2 = tf.assign_add(ave_feature2,
                                             tf.square(warped_view_feature))
                view = tf.add(view, 1)
                return view, ave_feature, ave_feature2

            view = tf.constant(0)
            cond = lambda view, *_: tf.less(view, FLAGS.view_num - 1)
            _, ave_feature, ave_feature2 = tf.while_loop(
                cond,
                body, [view, ave_feature, ave_feature2],
                back_prop=False,
                parallel_iterations=1)

            ave_feature = tf.assign(
                ave_feature,
                tf.square(ave_feature) / (FLAGS.view_num * FLAGS.view_num))
            ave_feature2 = tf.assign(
                ave_feature2, ave_feature2 / FLAGS.view_num - ave_feature)
            depth_costs.append(ave_feature2)
        cost_volume = tf.stack(depth_costs, axis=1)

    # filtered cost volume, size of (B, D, H, W, 1)
    if is_master_gpu:
        filtered_cost_volume_tower = RegNetUS0({'data': cost_volume},
                                               is_training=True,
                                               reuse=False)
    else:
        filtered_cost_volume_tower = RegNetUS0({'data': cost_volume},
                                               is_training=True,
                                               reuse=True)
    filtered_cost_volume = tf.squeeze(filtered_cost_volume_tower.get_output(),
                                      axis=-1)

    # depth map by softArgmin
    with tf.name_scope('soft_arg_min'):
        # probability volume by soft max
        probability_volume = tf.nn.softmax(tf.scalar_mul(
            -1, filtered_cost_volume),
                                           axis=1,
                                           name='prob_volume')

        # depth image by soft argmin
        volume_shape = tf.shape(probability_volume)
        soft_2d = []
        for i in range(FLAGS.batch_size):
            soft_1d = tf.linspace(depth_start[i], depth_end[i],
                                  tf.cast(depth_num, tf.int32))
            soft_2d.append(soft_1d)
        soft_2d = tf.reshape(tf.stack(soft_2d, axis=0),
                             [volume_shape[0], volume_shape[1], 1, 1])
        soft_4d = tf.tile(soft_2d, [1, 1, volume_shape[2], volume_shape[3]])
        estimated_depth_map = tf.reduce_sum(soft_4d * probability_volume,
                                            axis=1)
        estimated_depth_map = tf.expand_dims(estimated_depth_map, axis=3)

    # probability map
    prob_map = get_propability_map(probability_volume, estimated_depth_map,
                                   depth_start, depth_interval)

    # return filtered_depth_map,
    return estimated_depth_map, prob_map
    def __init__(self, config, batch, word_mat=None, filter_sizes=None, embedding_size=None,num_filters=None,trainable=True, l2_reg_lambda=0.0, keep_prob=0.9, graph=None):

        # Placeholders for input, output and dropout
        self.config = config
        self.graph = graph if graph is not None else tf.Graph()
        self.trainable = trainable
        if trainable == True:
            self.input_x, self.input_x1, self.ch, self.qh, self.input_y, self.qa_id = batch.get_next()  # self.y1 is (64, 3)self.alterh batch_size is[batch,3,alternative_len,chara_len]
        else:
            self.input_x, self.input_x1, self.ch, self.qh = batch.get_next()  # self.y1 is (64, 3)self.alterh batch_size is[batch,3,alternative_len,chara_len]
        self.dropout_keep_prob =keep_prob
        self.global_step = tf.get_variable('global_step', shape=[], dtype=tf.int32,
                                           initializer=tf.constant_initializer(0), trainable=False)
        self.dropout = tf.placeholder_with_default(0.5, (), name="dropout")
        # Keeping track of l2 regularization loss (optional)
        l2_loss = tf.constant(0.0)
        # Embedding layer
        with tf.device('/cpu:0'), tf.name_scope("embedding"):
            self.W = tf.get_variable("word_mat", initializer=tf.constant(word_mat, dtype=tf.float32),
                                            trainable=True)
            self.c_mask = tf.cast(self.input_x, tf.bool)  # self.c为填充之后的长度是一致的,用0进行填充
            self.q_mask = tf.cast(self.input_x1, tf.bool)
            if trainable:
                self.c_maxlen, self.q_maxlen, = config.para_limit, config.ques_limit,
            else:
                self.c_maxlen, self.q_maxlen = config.test_para_limit, config.test_ques_limit

            self.embedded_chars = tf.nn.embedding_lookup(self.W, self.input_x)
            self.embedded_chars1 = tf.nn.embedding_lookup(self.W, self.input_x1)
            self.embedded_chars_expanded = tf.expand_dims(self.embedded_chars, -1)
            self.embedded_question = tf.expand_dims(self.embedded_chars1, -1)
        S = optimized_trilinear_for_attention([self.embedded_chars_expanded, self.embedded_question], self.c_maxlen, self.q_maxlen,
                                              input_keep_prob=1.0 - self.dropout)
        print(S,"2222222222222222222")
        # Create a convolution + maxpool layer for each filter size
        pooled_outputs = []
        for i, filter_size in enumerate(filter_sizes):
            with tf.name_scope("conv-maxpool-%s" % filter_size):
                # Convolution Layer
                filter_shape = [filter_size, embedding_size, 1, num_filters]
                W = tf.Variable(tf.truncated_normal(filter_shape, stddev=0.1), name="W")
                b = tf.Variable(tf.constant(0.1, shape=[num_filters]), name="b")
                conv = tf.nn.conv2d(
                    self.embedded_chars_expanded,
                    W,
                    strides=[1, 1, 1, 1],
                    padding="VALID",
                    name="conv")
                # Apply nonlinearity
                h = tf.nn.relu(tf.nn.bias_add(conv, b), name="relu")
                # Maxpooling over the outputs
                pooled = tf.nn.max_pool(
                    h,
                    ksize=[1, config.para_limit - filter_size + 1, 1, 1],
                    strides=[1, 1, 1, 1],
                    padding='VALID',
                    name="pool")
                pooled_outputs.append(pooled)

        # Combine all the pooled features
        num_filters_total = num_filters * len(filter_sizes)
        self.h_pool = tf.concat(pooled_outputs, 3)
        self.h_pool_flat = tf.reshape(self.h_pool, [-1, num_filters_total])

        # Add dropout
        with tf.name_scope("dropout"):
            self.h_drop = tf.nn.dropout(self.h_pool_flat, self.dropout_keep_prob)

        # Final (unnormalized) scores and predictions
        with tf.name_scope("output"):
            W = tf.get_variable(
                "W",
                shape=[num_filters_total, 3],
                initializer=tf.contrib.layers.xavier_initializer())
            b = tf.Variable(tf.constant(0.1, shape=[3]), name="b")
            l2_loss += tf.nn.l2_loss(W)
            l2_loss += tf.nn.l2_loss(b)
            self.scores = tf.nn.xw_plus_b(self.h_drop, W, b, name="scores")
            self.predictions = tf.argmax(self.scores, 1, name="predictions")

        # Calculate mean cross-entropy loss
        with tf.name_scope("loss"):
            losses = tf.nn.softmax_cross_entropy_with_logits(logits=self.scores, labels=self.input_y)
            self.loss = tf.reduce_mean(losses) + l2_reg_lambda * l2_loss

        # Accuracy
        with tf.name_scope("accuracy"):
            correct_predictions = tf.equal(self.predictions, tf.argmax(self.input_y, 1))
            self.accuracy = tf.reduce_mean(tf.cast(correct_predictions, "float"), name="accuracy")
        if config.decay is not None:
            self.var_ema = tf.train.ExponentialMovingAverage(config.decay)
            ema_op = self.var_ema.apply(tf.trainable_variables())
            with tf.control_dependencies([ema_op]):
                self.loss = tf.identity(self.loss)

                self.assign_vars = []
                for var in tf.global_variables():
                    v = self.var_ema.average(var)
                    if v:
                        self.assign_vars.append(tf.assign(var, v))
        self.lr = tf.minimum(config.init_lr,
                             0.001 / tf.log(999.) * tf.log(tf.cast(self.global_step, tf.float32) + 1))
        self.opt = tf.train.AdamOptimizer(learning_rate=self.lr, beta1=0.8, beta2=0.999, epsilon=1e-7)
        grads = self.opt.compute_gradients(self.loss)
        gradients, variables = zip(*grads)
        capped_grads, _ = tf.clip_by_global_norm(
            gradients, config.grad_clip)
        self.train_op = self.opt.apply_gradients(
            zip(capped_grads, variables), global_step=self.global_step)
        self.saver = tf.train.Saver(tf.global_variables(), max_to_keep=3)
Beispiel #55
0
    [F,F,bias],
]
train_out =[
    [T],
    [F],
    [F],
    [F],
]
W = tf.Variable(tf.random_normal([3,1]))
init = tf.initialize_all_variables()
sess= tf.Session()
sess.run(init)
# Creating a step function:
def step(x):
    is_greator = tf.greater(x,0)
    flot_greator = tf.to_float(is_greator)
    double = tf.multiply(flot_greator,2)
    return  tf.subtract(double,1)
output = step((tf.matmul(train_in,W)))
error = tf.subtract(train_out,output)
mse = tf.reduce_mean(tf.square(error))
# Weight reasignment
delta = tf.matmul(train_in,error,transpose_a=True)
train = tf.assign(W,tf.add(W,delta))
err,  target =  1,0
epoch, max_epoch= 0 , 10
while err >target and epoch < max_epoch:
    epoch += 1
    err, _ = sess.run([mse,train])
    print(err, epoch)
Beispiel #56
0
def inference_winner_take_all(images,
                              cams,
                              depth_num,
                              depth_start,
                              depth_end,
                              is_master_gpu=True,
                              reg_type='GRU',
                              inverse_depth=False):
    """ infer disparity image from stereo images and cameras """

    if not inverse_depth:
        depth_interval = (depth_end -
                          depth_start) / (tf.cast(depth_num, tf.float32) - 1)

    # reference image
    ref_image = tf.squeeze(tf.slice(images, [0, 0, 0, 0, 0],
                                    [-1, 1, -1, -1, 3]),
                           axis=1)
    ref_cam = tf.squeeze(tf.slice(cams, [0, 0, 0, 0, 0], [-1, 1, 2, 4, 4]),
                         axis=1)

    # image feature extraction
    if is_master_gpu:
        ref_tower = UNetDS2GN({'data': ref_image},
                              is_training=True,
                              reuse=False)
    else:
        ref_tower = UNetDS2GN({'data': ref_image},
                              is_training=True,
                              reuse=True)
    view_towers = []
    for view in range(1, FLAGS.view_num):
        view_image = tf.squeeze(tf.slice(images, [0, view, 0, 0, 0],
                                         [-1, 1, -1, -1, -1]),
                                axis=1)
        view_tower = UNetDS2GN({'data': view_image},
                               is_training=True,
                               reuse=True)
        view_towers.append(view_tower)

    # get all homographies
    view_homographies = []
    for view in range(1, FLAGS.view_num):
        view_cam = tf.squeeze(tf.slice(cams, [0, view, 0, 0, 0],
                                       [-1, 1, 2, 4, 4]),
                              axis=1)
        if inverse_depth:
            homographies = get_homographies_inv_depth(ref_cam,
                                                      view_cam,
                                                      depth_num=depth_num,
                                                      depth_start=depth_start,
                                                      depth_end=depth_end)
        else:
            homographies = get_homographies(ref_cam,
                                            view_cam,
                                            depth_num=depth_num,
                                            depth_start=depth_start,
                                            depth_interval=depth_interval)
        view_homographies.append(homographies)

    # gru unit
    gru1_filters = 16
    gru2_filters = 4
    gru3_filters = 2
    feature_shape = [FLAGS.batch_size, FLAGS.max_h / 4, FLAGS.max_w / 4, 32]
    gru_input_shape = [feature_shape[1], feature_shape[2]]
    state1 = tf.zeros(
        [FLAGS.batch_size, feature_shape[1], feature_shape[2], gru1_filters])
    state2 = tf.zeros(
        [FLAGS.batch_size, feature_shape[1], feature_shape[2], gru2_filters])
    state3 = tf.zeros(
        [FLAGS.batch_size, feature_shape[1], feature_shape[2], gru3_filters])
    conv_gru1 = ConvGRUCell(shape=gru_input_shape,
                            kernel=[3, 3],
                            filters=gru1_filters)
    conv_gru2 = ConvGRUCell(shape=gru_input_shape,
                            kernel=[3, 3],
                            filters=gru2_filters)
    conv_gru3 = ConvGRUCell(shape=gru_input_shape,
                            kernel=[3, 3],
                            filters=gru3_filters)

    # initialize variables
    exp_sum = tf.Variable(tf.zeros(
        [FLAGS.batch_size, feature_shape[1], feature_shape[2], 1]),
                          name='exp_sum',
                          trainable=False,
                          collections=[tf.GraphKeys.LOCAL_VARIABLES])
    depth_image = tf.Variable(tf.zeros(
        [FLAGS.batch_size, feature_shape[1], feature_shape[2], 1]),
                              name='depth_image',
                              trainable=False,
                              collections=[tf.GraphKeys.LOCAL_VARIABLES])
    max_prob_image = tf.Variable(tf.zeros(
        [FLAGS.batch_size, feature_shape[1], feature_shape[2], 1]),
                                 name='max_prob_image',
                                 trainable=False,
                                 collections=[tf.GraphKeys.LOCAL_VARIABLES])
    init_map = tf.zeros(
        [FLAGS.batch_size, feature_shape[1], feature_shape[2], 1])

    # define winner take all loop
    def body(depth_index, state1, state2, state3, depth_image, max_prob_image,
             exp_sum, incre):
        """Loop body."""

        # calculate cost
        ave_feature = ref_tower.get_output()
        ave_feature2 = tf.square(ref_tower.get_output())
        for view in range(0, FLAGS.view_num - 1):
            homographies = view_homographies[view]
            homographies = tf.transpose(homographies, perm=[1, 0, 2, 3])
            homography = homographies[depth_index]
            # warped_view_feature = homography_warping(view_towers[view].get_output(), homography)
            warped_view_feature = tf_transform_homography(
                view_towers[view].get_output(), homography)
            ave_feature = ave_feature + warped_view_feature
            ave_feature2 = ave_feature2 + tf.square(warped_view_feature)
        ave_feature = ave_feature / FLAGS.view_num
        ave_feature2 = ave_feature2 / FLAGS.view_num
        cost = ave_feature2 - tf.square(ave_feature)
        cost.set_shape(
            [FLAGS.batch_size, feature_shape[1], feature_shape[2], 32])

        # gru
        reg_cost1, state1 = conv_gru1(-cost, state1, scope='conv_gru1')
        reg_cost2, state2 = conv_gru2(reg_cost1, state2, scope='conv_gru2')
        reg_cost3, state3 = conv_gru3(reg_cost2, state3, scope='conv_gru3')
        reg_cost = tf.layers.conv2d(reg_cost3,
                                    1,
                                    3,
                                    padding='same',
                                    reuse=tf.AUTO_REUSE,
                                    name='prob_conv')
        prob = tf.exp(reg_cost)

        # index
        d_idx = tf.cast(depth_index, tf.float32)
        if inverse_depth:
            inv_depth_start = tf.div(1.0, depth_start)
            inv_depth_end = tf.div(1.0, depth_end)
            inv_interval = (inv_depth_start - inv_depth_end) / (
                tf.cast(depth_num, 'float32') - 1)
            inv_depth = inv_depth_start - d_idx * inv_interval
            depth = tf.div(1.0, inv_depth)
        else:
            depth = depth_start + d_idx * depth_interval
        temp_depth_image = tf.reshape(depth, [FLAGS.batch_size, 1, 1, 1])
        temp_depth_image = tf.tile(temp_depth_image,
                                   [1, feature_shape[1], feature_shape[2], 1])

        # update the best
        update_flag_image = tf.cast(tf.less(max_prob_image, prob),
                                    dtype='float32')
        new_max_prob_image = update_flag_image * prob + (
            1 - update_flag_image) * max_prob_image
        new_depth_image = update_flag_image * temp_depth_image + (
            1 - update_flag_image) * depth_image
        max_prob_image = tf.assign(max_prob_image, new_max_prob_image)
        depth_image = tf.assign(depth_image, new_depth_image)

        # update counter
        exp_sum = tf.assign_add(exp_sum, prob)
        depth_index = tf.add(depth_index, incre)

        return depth_index, state1, state2, state3, depth_image, max_prob_image, exp_sum, incre

    # run forward loop
    exp_sum = tf.assign(exp_sum, init_map)
    depth_image = tf.assign(depth_image, init_map)
    max_prob_image = tf.assign(max_prob_image, init_map)
    depth_index = tf.constant(0)
    incre = tf.constant(1)
    cond = lambda depth_index, *_: tf.less(depth_index, depth_num)
    _, state1, state2, state3, depth_image, max_prob_image, exp_sum, incre = tf.while_loop(
        cond,
        body, [
            depth_index, state1, state2, state3, depth_image, max_prob_image,
            exp_sum, incre
        ],
        back_prop=False,
        parallel_iterations=1)

    # get output
    forward_exp_sum = exp_sum + 1e-7
    forward_depth_map = depth_image
    return forward_depth_map, max_prob_image / forward_exp_sum
Beispiel #57
0
def learn(
        env,
        policy_fn,
        *,
        timesteps_per_actorbatch,  # timesteps per actor per update
        clip_param,
        entcoeff,  # clipping parameter epsilon, entropy coeff
        optim_epochs,
        optim_stepsize,
        optim_batchsize,  # optimization hypers
        gamma,
        lam,  # advantage estimation
        max_timesteps=0,
        max_episodes=0,
        max_iters=0,
        max_seconds=0,  # time constraint
        callback=None,  # you can do anything in the callback, since it takes locals(), globals()
        adam_epsilon=1e-5,
        schedule='constant',  # annealing for stepsize parameters (epsilon and adam)
        gradients=True,
        hessians=False,
        model_path='model',
        output_prefix,
        sim):

    #Directory setup:
    model_dir = 'models/'
    if not os.path.exists(model_dir):
        os.makedirs(model_dir)
    # Setup losses and stuff
    # ----------------------------------------
    ob_space = env.observation_space
    ac_space = env.action_space
    pi = policy_fn("pi", ob_space,
                   ac_space)  # Construct network for new policy
    oldpi = policy_fn("oldpi", ob_space, ac_space)  # Network for old policy
    atarg = tf.placeholder(
        dtype=tf.float32,
        shape=[None])  # Target advantage function (if applicable)
    ret = tf.placeholder(dtype=tf.float32, shape=[None])  # Empirical return

    lrmult = tf.placeholder(
        name='lrmult', dtype=tf.float32,
        shape=[])  # learning rate multiplier, updated with schedule
    clip_param = clip_param * lrmult  # Annealed cliping parameter epislon

    ob = U.get_placeholder_cached(name="ob")
    ac = pi.pdtype.sample_placeholder([None])

    kloldnew = oldpi.pd.kl(pi.pd)
    ent = pi.pd.entropy()
    meankl = tf.reduce_mean(kloldnew)
    meanent = tf.reduce_mean(ent)
    pol_entpen = (-entcoeff) * meanent

    ratio = tf.exp(pi.pd.logp(ac) - oldpi.pd.logp(ac))  # pnew / pold
    surr1 = ratio * atarg  # surrogate from conservative policy iteration
    surr2 = tf.clip_by_value(ratio, 1.0 - clip_param,
                             1.0 + clip_param) * atarg  #
    pol_surr = -tf.reduce_mean(tf.minimum(
        surr1, surr2))  # PPO's pessimistic surrogate (L^CLIP)
    vf_loss = tf.reduce_mean(tf.square(pi.vpred - ret))
    total_loss = pol_surr + pol_entpen + vf_loss
    losses = [pol_surr, pol_entpen, vf_loss, meankl, meanent]
    loss_names = ["pol_surr", "pol_entpen", "vf_loss", "kl", "ent"]

    var_list = pi.get_trainable_variables()

    lossandgradandhessian = U.function(
        [ob, ac, atarg, ret, lrmult], losses +
        [U.flatgrad(total_loss, var_list),
         U.flathess(total_loss, var_list)])
    lossandgrad = U.function([ob, ac, atarg, ret, lrmult],
                             losses + [U.flatgrad(total_loss, var_list)])
    adam = MpiAdam(var_list, epsilon=adam_epsilon)

    assign_old_eq_new = U.function(
        [], [],
        updates=[
            tf.assign(oldv, newv)
            for (oldv,
                 newv) in zipsame(oldpi.get_variables(), pi.get_variables())
        ])
    compute_losses = U.function([ob, ac, atarg, ret, lrmult], losses)

    U.initialize()
    # Set the logs writer to the folder /tmp/tensorflow_logs
    tf.summary.FileWriter(
        '/home/aespielberg/ResearchCode/baselines/baselines/tmp/',
        graph_def=tf.get_default_session().graph_def)
    adam.sync()

    # Prepare for rollouts
    # ----------------------------------------
    seg_gen = traj_segment_generator(pi,
                                     env,
                                     timesteps_per_actorbatch,
                                     stochastic=True)

    episodes_so_far = 0
    timesteps_so_far = 0
    iters_so_far = 0
    tstart = time.time()
    lenbuffer = deque(maxlen=100)  # rolling buffer for episode lengths
    rewbuffer = deque(maxlen=100)  # rolling buffer for episode rewards

    assert sum(
        [max_iters > 0, max_timesteps > 0, max_episodes > 0,
         max_seconds > 0]) == 1, "Only one time constraint permitted"

    gradient_indices = get_gradient_indices(pi)

    while True:
        if callback: callback(locals(), globals())

        #ANDYTODO: add new break condition
        '''
        try:
            print(np.std(rewbuffer) / np.mean(rewbuffer))
            print(rewbuffer)
            if np.std(rewbuffer) / np.mean(rewbuffer) < 0.01: #TODO: input argument
                break
        except:
            pass #No big
        '''

        if max_timesteps and timesteps_so_far >= max_timesteps:
            break
        elif max_episodes and episodes_so_far >= max_episodes:
            break
        elif max_iters and iters_so_far >= max_iters:
            break
        elif max_seconds and time.time() - tstart >= max_seconds:
            break

        if schedule == 'constant':
            cur_lrmult = 1.0
        elif schedule == 'linear':
            cur_lrmult = max(1.0 - float(timesteps_so_far) / max_timesteps, 0)
        else:
            raise NotImplementedError

        logger.log("********** Iteration %i ************" % iters_so_far)

        seg = seg_gen.__next__()
        add_vtarg_and_adv(seg, gamma, lam)

        # ob, ac, atarg, ret, td1ret = map(np.concatenate, (obs, acs, atargs, rets, td1rets))
        ob, ac, atarg, tdlamret = seg["ob"], seg["ac"], seg["adv"], seg[
            "tdlamret"]
        vpredbefore = seg["vpred"]  # predicted value function before udpate
        atarg = (atarg - atarg.mean()
                 ) / atarg.std()  # standardized advantage function estimate
        d = Dataset(dict(ob=ob, ac=ac, atarg=atarg, vtarg=tdlamret),
                    shuffle=not pi.recurrent)
        optim_batchsize = optim_batchsize or ob.shape[0]

        if hasattr(pi, "ob_rms"):
            pi.ob_rms.update(ob)  # update running mean/std for policy

        assign_old_eq_new()  # set old parameter values to new parameter values
        logger.log("Optimizing...")
        logger.log(fmt_row(13, loss_names))
        # Here we do a bunch of optimization epochs over the data
        for _ in range(optim_epochs):
            gradient_set = []
            losses = [
            ]  # list of tuples, each of which gives the loss for a minibatch
            for batch in d.iterate_once(optim_batchsize):
                *newlosses, g = lossandgrad(batch["ob"], batch["ac"],
                                            batch["atarg"], batch["vtarg"],
                                            cur_lrmult)
                gradient_set.append(g)
                if not sim:
                    adam.update(g, optim_stepsize * cur_lrmult)
                losses.append(newlosses)
            logger.log(fmt_row(13, np.mean(losses, axis=0)))
        print('objective is')
        print(np.sum(np.mean(losses, axis=0)[0:3]))
        print(get_model_vars(pi))
        if sim:
            print('return routine')
            return_routine(pi, d, batch, output_prefix, losses, cur_lrmult,
                           lossandgradandhessian, gradients, hessians,
                           gradient_set)
            return pi
        if np.mean(list(
                map(np.linalg.norm,
                    gradient_set))) < 1e-4:  #TODO: make this a variable
            #TODO: abstract all this away somehow (scope)
            print('minimized!')
            return_routine(pi, d, batch, output_prefix, losses, cur_lrmult,
                           lossandgradandhessian, gradients, hessians,
                           gradient_set)
            return pi
        print(np.mean(list(map(np.linalg.norm, np.array(gradient_set)))))
        logger.log("Evaluating losses...")
        losses = []
        for batch in d.iterate_once(optim_batchsize):
            newlosses = compute_losses(batch["ob"], batch["ac"],
                                       batch["atarg"], batch["vtarg"],
                                       cur_lrmult)
            losses.append(newlosses)
        meanlosses, _, _ = mpi_moments(losses, axis=0)
        logger.log(fmt_row(13, meanlosses))
        for (lossval, name) in zipsame(meanlosses, loss_names):
            logger.record_tabular("loss_" + name, lossval)
        logger.record_tabular("ev_tdlam_before",
                              explained_variance(vpredbefore, tdlamret))
        lrlocal = (seg["ep_lens"], seg["ep_rets"])  # local values
        listoflrpairs = MPI.COMM_WORLD.allgather(lrlocal)  # list of tuples
        lens, rews = map(flatten_lists, zip(*listoflrpairs))
        lenbuffer.extend(lens)
        rewbuffer.extend(rews)
        logger.record_tabular("EpLenMean", np.mean(lenbuffer))
        logger.record_tabular("EpRewMean", np.mean(rewbuffer))
        logger.record_tabular("EpThisIter", len(lens))
        episodes_so_far += len(lens)
        timesteps_so_far += sum(lens)
        iters_so_far += 1
        logger.record_tabular("EpisodesSoFar", episodes_so_far)
        logger.record_tabular("TimestepsSoFar", timesteps_so_far)
        logger.record_tabular("TimeElapsed", time.time() - tstart)
        if MPI.COMM_WORLD.Get_rank() == 0:
            logger.dump_tabular()
        if iters_so_far > 1:
            U.save_state(model_dir + model_path + str(iters_so_far))

    print('out of time')
    return_routine(pi, d, batch, output_prefix, losses, cur_lrmult,
                   lossandgradandhessian, gradients, hessians, gradient_set)
    return pi
Beispiel #58
0
def create_model(inputs1, inputs2, targets):
    def create_discriminator(discrim_inputs, discrim_targets):
        n_layers = 3
        layers = []

        input = tf.concat([discrim_inputs, discrim_targets], 3)

        with tf.variable_scope("layer_1"):
            convolved = conv(input, 3, a.ndf, 2)
            rectified = lrelu(convolved, 0.2)
            layers.append(rectified)

        for i in range(n_layers):
            with tf.variable_scope("layer_%d" % (len(layers) + 1)):
                out_channels = a.ndf * min(2**(i + 1), 8)
                stride = 1 if i == n_layers - 1 else 2  # last layer here has stride 1
                convolved = conv(layers[-1], 3, out_channels, stride=stride)
                rectified = lrelu(convolved, 0.2)
                layers.append(rectified)

        with tf.variable_scope("layer_%d" % (len(layers) + 1)):
            convolved = conv(rectified, 3, 1, 1)
            output = tf.sigmoid(convolved)
            layers.append(output)

        return layers[-1]

    with tf.variable_scope("generator") as scope:
        out_channels = int(targets.get_shape()[-1])
        outputs = create_generator(inputs1, inputs2, out_channels)

    with tf.name_scope("real_discriminator"):
        with tf.variable_scope("discriminator"):
            predict_real = create_discriminator(inputs1, targets)

    with tf.name_scope("fake_discriminator"):
        with tf.variable_scope("discriminator", reuse=True):
            predict_fake = create_discriminator(inputs1, outputs)

    with tf.name_scope("discriminator_loss"):
        discrim_loss = tf.reduce_mean(-(tf.log(predict_real + EPS) +
                                        tf.log(1 - predict_fake + EPS)))

    with tf.name_scope("generator_loss"):
        gen_loss_GAN = tf.reduce_mean(-tf.log(predict_fake + EPS))
        gen_loss_L1 = tf.reduce_mean(tf.abs(targets - outputs))
        gen_loss = gen_loss_GAN * a.gan_weight + gen_loss_L1 * a.l1_weight

    with tf.name_scope("discriminator_train"):
        discrim_tvars = [
            var for var in tf.trainable_variables()
            if var.name.startswith("discriminator")
        ]
        discrim_optim = tf.train.AdamOptimizer(a.lr, a.beta1)
        discrim_grads_and_vars = discrim_optim.compute_gradients(
            discrim_loss, var_list=discrim_tvars)
        discrim_train = discrim_optim.apply_gradients(discrim_grads_and_vars)

    with tf.name_scope("generator_train"):
        with tf.control_dependencies([discrim_train]):
            gen_tvars = [
                var for var in tf.trainable_variables()
                if var.name.startswith("generator")
            ]
            gen_optim = tf.train.AdamOptimizer(a.lr, a.beta1)
            gen_grads_and_vars = gen_optim.compute_gradients(
                gen_loss, var_list=gen_tvars)
            gen_train = gen_optim.apply_gradients(gen_grads_and_vars)

    ema = tf.train.ExponentialMovingAverage(decay=0.99)
    update_losses = ema.apply([discrim_loss, gen_loss_GAN, gen_loss_L1])

    global_step = tf.contrib.framework.get_or_create_global_step()
    incr_global_step = tf.assign(global_step, global_step + 1)

    return Model(
        predict_real=predict_real,
        predict_fake=predict_fake,
        discrim_loss=ema.average(discrim_loss),
        discrim_grads_and_vars=discrim_grads_and_vars,
        gen_loss_GAN=ema.average(gen_loss_GAN),
        gen_loss_L1=ema.average(gen_loss_L1),
        gen_grads_and_vars=gen_grads_and_vars,
        outputs=outputs,
        train=tf.group(update_losses, incr_global_step, gen_train),
    )
Beispiel #59
0
dataset_full_path = dataset_path + ".txt"
logs_dir = "./logs/"
##################
n_inputs = 8
n_outputs = 1
hidden_layers_nodes = [20, 20]
dropout_rate = [0.1, 0.1]

learning_rate = 0.01

batch_size = 1
iterations = 200
##################
training = tf.Variable(True)
mode = tf.placeholder(tf.bool)
training_mode_op = tf.assign(training, mode)

X = tf.placeholder(tf.float32, [None, n_inputs])
Y = tf.placeholder(tf.float32, [None, n_outputs])

sum_variables = []  # sum of all variables (each weight)
cua_variables = []  # sum of all variables mutiplied by itself (cuadratic sum)
p_variables = []  # % of variation of each variable
weight_cp = []  # copy of last weights to view the % of variation
denominador = 1

sum_var_str = [{}, {}, {}]
iteration_save = 0
s = []

raw_data = np.random.normal(10, 1, 100)
alpha = tf.constant(0.05)
beta = tf.Variable(1.0 - alpha)
current_value = tf.placeholder(tf.float32)
previous_average = tf.Variable(0.0)
update_average = alpha * current_value + tf.multiply(beta, previous_average)

average_history = tf.summary.scalar('running average', update_average)
value_history = tf.summary.scalar('incoming values', current_value)
merged = tf.summary.merge_all()
log_folder = './logs/'
writer = tf.summary.FileWriter(log_folder)

initializer = tf.global_variables_initializer()

with tf.Session() as session:
    session.run(initializer)
    writer.add_graph(session.graph)
    for index, value in enumerate(raw_data):
        feed_dict = {current_value: value}
        summary_string, current_average = session.run([merged, update_average], feed_dict=feed_dict)
        session.run(tf.assign(previous_average, current_average))
        logger.debug('raw data: %.2f current average: %.2f' % (value, current_average))
        writer.add_summary(summary_string, index)

logger.debug('done')
finish_time = time.time()
elapsed_hours, elapsed_remainder = divmod(finish_time - start_time, 3600)
elapsed_minutes, elapsed_seconds = divmod(elapsed_remainder, 60)
logger.info("Time: {:0>2}:{:0>2}:{:05.2f}".format(int(elapsed_hours), int(elapsed_minutes), elapsed_seconds))