def main(args): # We init as h=x W = tf.Variable([1], dtype=tf.float32) b = tf.Variable([0], dtype=tf.float32) x = tf.placeholder(tf.float32) h = W * x + b init = tf.global_variables_initializer() sess = tf.Session() #sess.run(init) #print("hyposis init:", sess.run(h, {x:[1,2,3,4]})) y = tf.placeholder(tf.float32) squared_deltas = tf.square(h - y) cost = 0.5 * tf.reduce_mean(squared_deltas) #print("cost init:", sess.run(cost, {x:[1,2,3,4], y:[0,-1,-2,-3]})) fixW = tf.assign(W, [-1.]) fixb = tf.assign(b, [1.]) sess.run([fixW, fixb]) #print("W, b, cost expected:", sess.run([fixW, fixb, cost], {x:[1,2,3,4], y:[0,-1,-2,-3]})) # linear regression sess.run(init)#assign optimizer = tf.train.GradientDescentOptimizer(0.01) train = optimizer.minimize(cost) for i in range(10000): sess.run(train, {x:[1,2,3,4,-3,35], y:[0,-1,-2,-3,4,-34]}) curr_W, curr_b, curr_loss = sess.run([W, b, cost], {x:[1,2,3,4,-3,35], y:[0,-1,-2,-3,4,-34]}) print("W, b, cost learned: ", curr_W, curr_b, curr_loss)
def _build_network(self): self._initPlaceholders() self.count_states = tf.Variable(initial_value=0, trainable=False, dtype=tf.int64, name='count_states') # this variables stores the number of states # Similarly, this is the counter for the number of episodes. self.count_episodes = tf.Variable(initial_value=0, trainable=False, dtype=tf.int64, name='count_episodes') # this variables stores the number of states # TensorFlow operation for increasing count_states. self.count_states_increase = tf.assign(self.count_states, self.count_states + 1) # TensorFlow operation for increasing count_episodes. self.count_episodes_increase = tf.assign(self.count_episodes, self.count_episodes + 1) self.q_values = create_conv_model(self.states, self.config, self.num_actions) error = tf.losses.mean_squared_error(self.q_values_target * self.actions,self.q_values * self.actions) self.loss = error self.learning_rate_op = tf.maximum(self.learning_rate_minimum, tf.train.exponential_decay( self.learning_rate, self.learning_rate_step, self.learning_rate_decay_step, self.learning_rate_decay, staircase=True)) self.optim = tf.train.RMSPropOptimizer(self.learning_rate_op, momentum=0.95, epsilon=0.01).minimize(self.loss) self.saver = tf.train.Saver() self.sess = tf.Session() self.load_checkpoint() self.summary = tf.summary.merge_all()
def batch_norm(x, name_scope, training, epsilon=1e-3, decay=0.999): """Assume 2d [batch, values] tensor""" with tf.variable_scope(name_scope): size = x.get_shape().as_list()[1] scale = tf.get_variable('scale', [size], initializer=tf.constant_initializer(0.1)) offset = tf.get_variable('offset', [size]) pop_mean = tf.get_variable('pop_mean', [size], initializer=tf.zeros_initializer(), trainable=False) pop_var = tf.get_variable('pop_var', [size], initializer=tf.ones_initializer(), trainable=False) batch_mean, batch_var = tf.nn.moments(x, [0]) train_mean_op = tf.assign( pop_mean, pop_mean * decay + batch_mean * (1 - decay)) train_var_op = tf.assign( pop_var, pop_var * decay + batch_var * (1 - decay)) def batch_statistics(): with tf.control_dependencies([train_mean_op, train_var_op]): return tf.nn.batch_normalization(x, batch_mean, batch_var, offset, scale, epsilon) def population_statistics(): return tf.nn.batch_normalization(x, pop_mean, pop_var, offset, scale, epsilon) return tf.cond(training, batch_statistics, population_statistics)
def test_capture(self): global_step = tf.contrib.framework.get_or_create_global_step() # Some test computation some_weights = tf.get_variable("weigths", [2, 128]) computation = tf.nn.softmax(some_weights) hook = hooks.MetadataCaptureHook( params={"step": 5}, model_dir=self.model_dir, run_config=tf.contrib.learn.RunConfig()) hook.begin() with self.test_session() as sess: sess.run(tf.global_variables_initializer()) #pylint: disable=W0212 mon_sess = monitored_session._HookedSession(sess, [hook]) # Should not trigger for step 0 sess.run(tf.assign(global_step, 0)) mon_sess.run(computation) self.assertEqual(gfile.ListDirectory(self.model_dir), []) # Should trigger *after* step 5 sess.run(tf.assign(global_step, 5)) mon_sess.run(computation) self.assertEqual(gfile.ListDirectory(self.model_dir), []) mon_sess.run(computation) self.assertEqual( set(gfile.ListDirectory(self.model_dir)), set(["run_meta", "tfprof_log", "timeline.json"]))
def fit(self, xs, ys): if self.normalize_inputs: # recompute normalizing constants for inputs new_mean = np.mean(xs, axis=0, keepdims=True) new_std = np.std(xs, axis=0, keepdims=True) + 1e-8 tf.get_default_session().run(tf.group( tf.assign(self.x_mean_var, new_mean), tf.assign(self.x_std_var, new_std), )) if self.use_trust_region and self.first_optimized: old_prob = self.f_prob(xs) inputs = [xs, ys, old_prob] optimizer = self.tr_optimizer else: inputs = [xs, ys] optimizer = self.optimizer loss_before = optimizer.loss(inputs) if self.name: prefix = self.name + "_" else: prefix = "" logger.record_tabular(prefix + 'LossBefore', loss_before) optimizer.optimize(inputs) loss_after = optimizer.loss(inputs) logger.record_tabular(prefix + 'LossAfter', loss_after) logger.record_tabular(prefix + 'dLoss', loss_before - loss_after) self.first_optimized = True
def testReuseVars(self): height, width = 3, 3 with self.test_session() as sess: image_shape = (10, height, width, 3) image_values = np.random.rand(*image_shape) expected_mean = np.mean(image_values, axis=(0, 1, 2)) expected_var = np.var(image_values, axis=(0, 1, 2)) images = tf.constant(image_values, shape=image_shape, dtype=tf.float32) output = ops.batch_norm(images, decay=0.1, is_training=False) update_ops = tf.get_collection(ops.UPDATE_OPS_COLLECTION) with tf.control_dependencies(update_ops): barrier = tf.no_op(name='gradient_barrier') output = control_flow_ops.with_dependencies([barrier], output) # Initialize all variables sess.run(tf.global_variables_initializer()) moving_mean = variables.get_variables('BatchNorm/moving_mean')[0] moving_variance = variables.get_variables('BatchNorm/moving_variance')[0] mean, variance = sess.run([moving_mean, moving_variance]) # After initialization moving_mean == 0 and moving_variance == 1. self.assertAllClose(mean, [0] * 3) self.assertAllClose(variance, [1] * 3) # Simulate assigment from saver restore. init_assigns = [tf.assign(moving_mean, expected_mean), tf.assign(moving_variance, expected_var)] sess.run(init_assigns) for _ in range(10): sess.run([output], {images: np.random.rand(*image_shape)}) mean = moving_mean.eval() variance = moving_variance.eval() # Although we feed different images, the moving_mean and moving_variance # shouldn't change. self.assertAllClose(mean, expected_mean) self.assertAllClose(variance, expected_var)
def fit_em(X, initial_mus, max_steps, tol, min_covar=MIN_COVAR_DEFAULT): tf.reset_default_graph() N, D = X.shape K, Dmu = initial_mus.shape assert D == Dmu mus0 = initial_mus sigmas0 = np.tile(np.var(X, axis=0), (K, 1)) alphas0 = np.ones(K) / K X = tf.constant(X) mus, sigmas, alphas = (tf.Variable(x, dtype='float64') for x in [mus0, sigmas0, alphas0]) all_ll, resp = estep(X, mus, sigmas, alphas) cmus, csigmas, calphas = mstep(X, resp, min_covar=min_covar) update_mus_step = tf.assign(mus, cmus) update_sigmas_step = tf.assign(sigmas, csigmas) update_alphas_step = tf.assign(alphas, calphas) init_op = tf.initialize_all_variables() ll = prev_ll = -np.inf with tf.Session() as sess: sess.run(init_op) for i in range(max_steps): ll = sess.run(tf.reduce_mean(all_ll)) sess.run((update_mus_step, update_sigmas_step, update_alphas_step)) #print('EM iteration', i, 'log likelihood', ll) if abs(ll - prev_ll) < tol: break prev_ll = ll m, s, a = sess.run((mus, sigmas, alphas)) return ll, m, s, a
def _apply(self, grad, var, indices=None): lr = tf.cast(self._learning_rate_tensor, var.dtype.base_dtype) m = self.get_slot(var, "m") v = self.get_slot(var, "v") beta1_t = tf.cast(self._beta1_t, var.dtype.base_dtype) beta2_t = tf.cast(self._beta2_t, var.dtype.base_dtype) epsilon_t = tf.cast(self._epsilon_t, var.dtype.base_dtype) # m_t = beta1 * m + (1 - beta1) * g_t m_scaled_g_values = grad * (1 - beta1_t) m_t = tf.assign(m, m * beta1_t, use_locking=self._use_locking) with tf.control_dependencies([m_t]): m_t = self._assign_add(m, updates=m_scaled_g_values, indices=indices) m_gathered = self._gather(m_t, indices=indices) # Also see tf.nn.moments. variance = tf.squared_difference(grad, m_gathered) # v_t = beta2 * v + (1 - beta2) * variance v_scaled_new_values = variance * (1 - beta2_t) v_t = tf.assign(v, v * beta2_t, use_locking=self._use_locking) with tf.control_dependencies([v_t]): v_t = self._assign_add(v, updates=v_scaled_new_values, indices=indices) v_gathered = self._gather(v_t, indices=indices) factor = v_gathered / (variance + epsilon_t) update = lr * grad * tf.minimum(factor, 1.0) var_update = self._assign_sub(ref=var, updates=update, indices=indices) return tf.group(*[var_update, m_t])
def __call__(self, x): if x.get_shape().ndims == 2: mean, var = tf.nn.moments(x, axes=(0,), keep_dims=True) normalized_x = (x-mean)/tf.sqrt(var+self.epsilon) moment_op = tf.group(tf.assign(self.mean, mean), tf.assign(self.var, var)) moment_op = tf.cond(self.train[0], lambda: moment_op, lambda: tf.no_op()) return self.gamma*normalized_x+self.beta, moment_op
def batch_norm(inputs, name_scope, is_training, epsilon=1e-3, decay=0.99): with tf.variable_scope(name_scope): size = inputs.get_shape().as_list()[1] gamma = tf.get_variable( 'gamma', [size], initializer=tf.constant_initializer(0.1)) # beta = tf.get_variable('beta', [size], initializer=tf.constant_initializer(0)) beta = tf.get_variable('beta', [size]) pop_mean = tf.get_variable('pop_mean', [size], initializer=tf.zeros_initializer(), trainable=False) pop_var = tf.get_variable('pop_var', [size], initializer=tf.ones_initializer(), trainable=False) batch_mean, batch_var = tf.nn.moments(inputs, [0]) train_mean_op = tf.assign( pop_mean, pop_mean * decay + batch_mean * (1 - decay)) train_var_op = tf.assign( pop_var, pop_var * decay + batch_var * (1 - decay)) def batch_statistics(): with tf.control_dependencies([train_mean_op, train_var_op]): return tf.nn.batch_normalization(inputs, batch_mean, batch_var, beta, gamma, epsilon) def pop_statistics(): return tf.nn.batch_normalization(inputs, pop_mean, pop_var, beta, gamma, epsilon) # control flow return tf.cond(is_training, batch_statistics, pop_statistics)
def testIsVariableInitialized(self): for use_gpu in [True, False]: with self.test_session(use_gpu=use_gpu): v0 = state_ops.variable_op([1, 2], tf.float32) self.assertEqual(False, tf.is_variable_initialized(v0).eval()) tf.assign(v0, [[2.0, 3.0]]).eval() self.assertEqual(True, tf.is_variable_initialized(v0).eval())
def mean_var_with_update(): ema_apply_op = ema.apply([batch_mean, batch_var]) pop_mean_op = tf.assign(pop_mean, ema.average(batch_mean)) pop_var_op = tf.assign(pop_var, ema.average(batch_var)) with tf.control_dependencies([ema_apply_op, pop_mean_op, pop_var_op]): return tf.identity(batch_mean), tf.identity(batch_var)
def batchnorm(x, gamma, beta, r_mean, r_var): mean, var = tf.nn.moments(x,[0]) update_mean = tf.assign(r_mean,0.9 * r_mean + 0.1 * mean) update_var = tf.assign(r_var,0.9 * r_var + 0.1 * var) with tf.control_dependencies([update_mean,update_var]): return tf.nn.batch_normalization(x,tf.clip_by_value(r_mean,1e-10,100),tf.clip_by_value(r_var,1e-10,100), offset=beta,scale=gamma,variance_epsilon=1e-5)
def expectation_maximization_step(self, x): # probability of emission sequence obs_prob_seq = tf.gather(self.E, x) with tf.name_scope('Forward_Backward'): self.forward_backward(obs_prob_seq) with tf.name_scope('Re_estimate_transition'): new_T0, new_transition = self.re_estimate_transition(x) with tf.name_scope('Re_estimate_emission'): new_emission = self.re_estimate_emission(x) with tf.name_scope('Check_Convergence'): converged = self.check_convergence(new_T0, new_transition, new_emission) with tf.name_scope('Update_parameters'): self.T0 = tf.assign(self.T0, new_T0) self.E = tf.assign(self.E, new_emission) self.T = tf.assign(self.T, new_transition) #self.count = tf.assign_add(self.count, 1) with tf.name_scope('histogram_summary'): _ = tf.histogram_summary(self.T0.name, self.T0) _ = tf.histogram_summary(self.T.name, self.T) _ = tf.histogram_summary(self.E.name, self.E) return converged
def __init__(self, gan=None, config=None, trainer=None, name="ProgressCompressTrainHook"): super().__init__(config=config, gan=gan, trainer=trainer, name=name) d_loss = [] self.x = tf.Variable(tf.zeros_like(gan.inputs.x)) self.g = tf.Variable(tf.zeros_like(gan.generator.sample)) stacked = tf.concat([self.gan.inputs.x, self.gan.generator.sample], axis=0) self.assign_x = tf.assign(self.x, gan.inputs.x) self.assign_g = tf.assign(self.g, gan.generator.sample) self.re_init_d = [d.initializer for d in gan.discriminator.variables()] gan.hack = self.g self.assign_knowledge_base = [] bs = gan.batch_size() real = gan.discriminator.named_layers['knowledge_base_target']#tf.reshape(gan.loss.sample[:2], [2,-1]) _inputs = hc.Config({'x':real}) inner_gan = KBGAN(config=self.config.knowledge_base, inputs=_inputs, x=real, latent=stacked) self.kb_loss = inner_gan.loss self.kb = inner_gan.generator self.trainer = inner_gan.trainer variables = inner_gan.variables() #variables += self.kb.variables() for c in gan.components: if hasattr(c, 'knowledge_base'): for name, net in c.knowledge_base: assign = self.kb.named_layers[name] if self.ops.shape(assign)[0] > self.ops.shape(net)[0]: assign = tf.slice(assign,[0 for i in self.ops.shape(net)] , [self.ops.shape(net)[0]]+self.ops.shape(assign)[1:]) self.assign_knowledge_base.append(tf.assign(net, assign)) self.gan.add_metric('d_kb', self.kb_loss.sample[0]) self.gan.add_metric('g_kb', self.kb_loss.sample[1])
def train_spectrogram_encoder(): tf.initialize_all_variables().run() print("Pretrain") for i in range(6000-1): batch_xs, batch_ys = speech.train.next_batch(100) # WTF, tensorflow can't do 3D tensor operations? # https://github.com/tensorflow/tensorflow/issues/406 => batch_xs=[flatten(matrix) for matrix in batch_xs] # you have to reshape to flat/matrix data? why didn't they call it matrixflow? feed = {x: batch_xs, y_: batch_ys} speech_step.run(feed) # better for encod_entropy too! (later) if(i%100==0): print("iteration %d"%i)#, end=' ') eval(feed) if((i+1)%7000==0): print("l_rate*=0.1") sess.run(tf.assign(l_rate,l_rate*0.1)) print("Train") for i in range(100000): batch_xs, batch_ys = speech.train.next_batch(100) feed = {x: batch_xs, y_: batch_ys} if((i+1)%9000==0):sess.run(tf.assign(l_rate,l_rate*0.3)) encod_step.run(feed) # alternating! speech_step.run(feed) train_step.run(feed) if(i%100==0): print("iteration %d"%i)#, end=' ') eval(feed)
def testReuseVars(self): height, width = 3, 3 with self.test_session() as sess: image_shape = (10, height, width, 3) image_values = np.random.rand(*image_shape) expected_mean = np.mean(image_values, axis=(0, 1, 2)) expected_var = np.var(image_values, axis=(0, 1, 2)) images = tf.constant(image_values, shape=image_shape, dtype=tf.float32) output = tf.contrib.layers.batch_norm(images, decay=0.1, is_training=False) update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) self.assertEquals(update_ops, []) # Initialize all variables sess.run(tf.initialize_all_variables()) moving_mean = tf.contrib.framework.get_variables( 'BatchNorm/moving_mean')[0] moving_variance = tf.contrib.framework.get_variables( 'BatchNorm/moving_variance')[0] mean, variance = sess.run([moving_mean, moving_variance]) # After initialization moving_mean == 0 and moving_variance == 1. self.assertAllClose(mean, [0] * 3) self.assertAllClose(variance, [1] * 3) # Simulate assigment from saver restore. init_assigns = [tf.assign(moving_mean, expected_mean), tf.assign(moving_variance, expected_var)] sess.run(init_assigns) for _ in range(10): sess.run([output], {images: np.random.rand(*image_shape)}) mean = moving_mean.eval() variance = moving_variance.eval() # Although we feed different images, the moving_mean and moving_variance # shouldn't change. self.assertAllClose(mean, expected_mean) self.assertAllClose(variance, expected_var)
def fit(self, xs, ys): sess = tf.get_default_session() if self._normalize_inputs: # recompute normalizing constants for inputs sess.run([ tf.assign(self._x_mean_var, np.mean(xs, axis=0, keepdims=True)), tf.assign(self._x_std_var, np.std(xs, axis=0, keepdims=True) + 1e-8), ]) if self._normalize_outputs: # recompute normalizing constants for outputs sess.run([ tf.assign(self._y_mean_var, np.mean(ys, axis=0, keepdims=True)), tf.assign(self._y_std_var, np.std(ys, axis=0, keepdims=True) + 1e-8), ]) if self._use_trust_region: old_means, old_log_stds = self._f_pdists(xs) inputs = [xs, ys, old_means, old_log_stds] else: inputs = [xs, ys] loss_before = self._optimizer.loss(inputs) if self._name: prefix = self._name + "_" else: prefix = "" logger.record_tabular(prefix + 'LossBefore', loss_before) self._optimizer.optimize(inputs) loss_after = self._optimizer.loss(inputs) logger.record_tabular(prefix + 'LossAfter', loss_after) if self._use_trust_region: logger.record_tabular(prefix + 'MeanKL', self._optimizer.constraint_val(inputs)) logger.record_tabular(prefix + 'dLoss', loss_before - loss_after)
def parallel_acc_by_tags(model, sess, max_parallel_calcs, data_folder, read_func, from_file=None, data_set="test", feature="images", orientations=None): total_images = 0 if orientations is None: orientations = [0, 90, 180, 270] images, labels, tags = input_pipeline(data_folder_loc, max_parallel_calcs, data_set=data_set, feature=feature, num_images=None, binary_file=False, orientations=orientations, from_file=from_file, num_epochs=1) incorrect_images_list = tf.Variable([], dtype=tf.string, trainable=False, name="Incorrect_images") adder_image_names = tf.placeholder(dtype=tf.string, shape=[None], name="Adder_images") new_incorrect_images_list = tf.concat(0, [incorrect_images_list, adder_image_names]) add_incorrect_images = tf.assign(incorrect_images_list, new_incorrect_images_list, use_locking=True, validate_shape=False) incorrect_labels_list = tf.Variable([], dtype=tf.int32, trainable=False, name="Incorrect_image_labels") adder_image_labels = tf.placeholder(dtype=tf.int32, shape=[None], name="Adder_image_labels") new_incorrect_labels_list = tf.concat(0, [incorrect_labels_list, adder_image_labels]) add_incorrect_labels = tf.assign(incorrect_labels_list, new_incorrect_labels_list, use_locking=True, validate_shape=False) init_ops = tf.group(tf.local_variables_initializer(), tf.global_variables_initializer()) sess.run(init_ops) coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(sess=sess, coord=coord) steps = 0 try: print("Checking Accuracy") while not coord.should_stop(): steps += 1 raw_imgs_list, labels_list, tags_list = sess.run([images, labels, tags]) imgs_list = read_func(raw_imgs_list) preds = sess.run(model.correct_predictions, feed_dict={model.inputs: imgs_list, model.testy: labels_list, model.keep_probs: 1}) total_images += len(preds) incorrect_indices = np.where(preds == 0) # Uses locking so we do not lose any incorrect classifications sess.run(add_incorrect_images, feed_dict={adder_image_names: tags_list[incorrect_indices]}) sess.run(add_incorrect_labels, feed_dict={adder_image_labels: labels_list[incorrect_indices]}) if steps % 100 == 0: print("Calculated " + str(steps*max_parallel_calcs) + " files") except tf.errors.OutOfRangeError: print('Done training -- epoch limit reached') finally: # When done, ask the threads to stop. coord.request_stop() coord.join(threads) inc_name = sess.run(incorrect_images_list) inc_label = sess.run(incorrect_labels_list) print("Correct classifications: " + str(total_images - len(inc_name))) print("Total images: " + str(total_images)) print("Accuracy: " + str((total_images - len(inc_name))/total_images)) with open(os.path.join(data_folder, "incorrect.txt"), 'w') as f: for i in range(len(inc_name)): f.write(os.path.join(data_folder, inc_name[i].decode('utf-8')) + ', ' + str(inc_label[i]*90) + '\n') sess.close()
def style_transfer_train(loss, img_var, initial_lr=3.0, decayed_lr=0.1, decay_lr_at=180, max_iter=200, print_every=50): # Create and initialize the Adam optimizer lr_var = tf.Variable(initial_lr, name="lr") # Create train_op that updates the generated image when run with tf.variable_scope("optimizer") as opt_scope: train_op = tf.train.AdamOptimizer(lr_var).minimize(loss, var_list=[img_var]) # Initialize the generated image and optimization variables opt_vars = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope=opt_scope.name) sess.run(tf.variables_initializer([lr_var, img_var] + opt_vars)) # Create an op that will clamp the image values when run clamp_image_op = tf.assign(img_var, tf.clip_by_value(img_var, -1.5, 1.5)) imgs_in_process = [] # Hardcoded handcrafted for t in range(max_iter): # Take an optimization step to update img_var sess.run(train_op) if t < decay_lr_at: sess.run(clamp_image_op) if t == decay_lr_at: sess.run(tf.assign(lr_var, decayed_lr)) if t % print_every == 0: print("train step: %d" % t) img = sess.run(img_var) imgs_in_process.append(img[0]) print("train step: %d" % t) final_img = sess.run(img_var)[0] return imgs_in_process, final_img
def bn_layer(inputs,is_training,name='BatchNorm',moving_decay=0.9,eps=1e-5): shape = inputs.shape assert len(shape) in [2,4] param_shape = shape[-1] gamma = tf.Variable(tf.ones(param_shape), name='gamma') beta = tf.Variable(tf.zeros(param_shape), name='beta') mean = tf.Variable(tf.ones(param_shape), trainable=False, name='mean') var = tf.Variable(tf.ones(param_shape), trainable=False, name='var') tf.add_to_collection('l2_losses', tf.contrib.layers.l2_regularizer(lambda1)(gamma)) tf.add_to_collection('l2_losses', tf.contrib.layers.l2_regularizer(lambda1)(beta)) tf.add_to_collection('l2_losses', tf.contrib.layers.l2_regularizer(lambda1)(mean)) tf.add_to_collection('l2_losses', tf.contrib.layers.l2_regularizer(lambda1)(var)) if is_training == True: batch_mean, batch_var = tf.nn.moments(inputs,[0,1,2],name='moments') mean = tf.assign(mean, batch_mean) var = tf.assign(var, batch_var) return tf.nn.batch_normalization(inputs,batch_mean+mean*1e-10,batch_var+var*1e-10,gamma,beta,eps) else: return tf.nn.batch_normalization(inputs,mean,var,gamma,beta,eps)
def batch_normalization(self, input_data, name, scale_offset=True, relu=False): with tf.variable_scope(name) as scope: shape = [input_data.get_shape()[-1]] pop_mean = tf.get_variable("mean", shape, initializer = tf.constant_initializer(0.0), trainable=False) pop_var = tf.get_variable("variance", shape, initializer = tf.constant_initializer(1.0), trainable=False) epsilon = 1e-4 decay = 0.999 if scale_offset: scale = tf.get_variable("scale", shape, initializer = tf.constant_initializer(1.0)) offset = tf.get_variable("offset", shape, initializer = tf.constant_initializer(0.0)) else: scale, offset = (None, None) if self.is_training: batch_mean, batch_var = tf.nn.moments(input_data, [0, 1, 2]) train_mean = tf.assign(pop_mean, pop_mean * decay + batch_mean * (1 - decay)) train_var = tf.assign(pop_var, pop_var * decay + batch_var * (1 - decay)) with tf.control_dependencies([train_mean, train_var]): output = tf.nn.batch_normalization(input_data, batch_mean, batch_var, offset, scale, epsilon, name = name) else: output = tf.nn.batch_normalization(input_data, pop_mean, pop_var, offset, scale, epsilon, name = name) if relu: output = tf.nn.relu(output) return output
def update_parameters(mu, sigma, best_params): new_mu = tf.reduce_mean(best_params, 0) mu_ass = tf.assign(mu, new_mu) diff = tf.squared_difference(best_params, new_mu) std = tf.sqrt(tf.reduce_mean(diff, 0)) sigma_ass = tf.assign(sigma, std) return mu_ass, sigma_ass
def run_tf_simulation(self, c_in, h_in, timesteps=100, dt=0.005): r_e = tf.Variable( tf.zeros([self.N_pairs, self.N_pairs]) ) r_i = tf.Variable( tf.zeros([self.N_pairs, self.N_pairs]) ) W_EE = tf.placeholder(tf.float32) W_EI = tf.placeholder(tf.float32) W_IE = tf.placeholder(tf.float32) W_II = tf.placeholder(tf.float32) k = tf.placeholder(tf.float32) n_E = tf.placeholder(tf.float32) n_I = tf.placeholder(tf.float32) tau_E = tf.placeholder(tf.float32) tau_I = tf.placeholder(tf.float32) c0 = tf.constant(c_in) h0 = tf.constant(h_in) # Compile functions: I_E = c0*h0 + tf.transpose(tf.reshape(tf.reduce_sum(W_EE * r_e, [1,2]), [75,75])) \ - tf.transpose(tf.reshape(tf.reduce_sum(W_EI * r_i, [1,2]), [75,75])) I_I = c0*h0 + tf.transpose(tf.reshape(tf.reduce_sum(W_IE * r_e, [1,2]), [75,75])) \ - tf.transpose(tf.reshape(tf.reduce_sum(W_II * r_i, [1,2]), [75,75])) I_thresh_E = tf.maximum(0., I_E) I_thresh_I = tf.maximum(0., I_I) r_SS_E = k * tf.pow(I_thresh_E, n_E) r_SS_I = k * tf.pow(I_thresh_I, n_I) rE_out = r_e + dt*(-r_e+r_SS_E)/tau_E rI_out = r_i + dt*(-r_i+r_SS_I)/tau_I update_rE = tf.assign(r_e, rE_out) update_rI = tf.assign(r_i, rI_out) init = tf.initialize_all_variables() rE = 0 rI = 0 fd = {W_EE:self.W_EE.astype(np.float32), W_EI:self.W_EI.astype(np.float32), W_IE:self.W_IE.astype(np.float32), W_II:self.W_II.astype(np.float32), k:self.k.astype(np.float32), n_E:self.n_E.astype(np.float32), n_I:self.n_I.astype(np.float32), tau_E:self.tau_E.astype(np.float32), tau_I:self.tau_I.astype(np.float32)} with tf.Session() as sess: sess.run(init, feed_dict=fd) for t in range(timesteps): # run the simulation sess.run([update_rE, update_rI], feed_dict=fd) # fetch the rates rE = sess.run([r_e], feed_dict=fd) rI = sess.run([r_i], feed_dict=fd) return rE, rI
def _cached_copy(self, var, name): """Helper function to create a worker cached copy of a Variable. Args: var: Variable or list of Variable to cache. If a list, the items are concatenated along dimension 0 to get the cached entry. name: name of cached variable. Returns: Tuple consisting of following three entries: cache: the new transient Variable. cache_init: op to initialize the Variable cache_reset: op to reset the Variable to some default value """ if var is None: return None, None, None else: cache = WALSModel._transient_var(name) with ops.colocate_with(cache): if isinstance(var, list): assert var if len(var) == 1: var = var[0] else: var = tf.concat(0, var) cache_init = tf.assign(cache, var, validate_shape=False) cache_reset = tf.assign(cache, 1.0, validate_shape=False) return cache, cache_init, cache_reset
def __init__(self,inputs,size,is_training,sess,parForTarget=None,bn_param=None): self.sess = sess self.scale = tf.Variable(tf.random_uniform([size],0.9,1.1)) self.beta = tf.Variable(tf.random_uniform([size],-0.03,0.03)) self.pop_mean = tf.Variable(tf.random_uniform([size],-0.03,0.03),trainable=False) self.pop_var = tf.Variable(tf.random_uniform([size],0.9,1.1),trainable=False) self.batch_mean, self.batch_var = tf.nn.moments(inputs,[0]) self.train_mean = tf.assign(self.pop_mean,self.pop_mean * decay + self.batch_mean * (1 - decay)) self.train_var = tf.assign(self.pop_var,self.pop_var * decay + self.batch_var * (1 - decay)) def training(): return tf.nn.batch_normalization(inputs, self.batch_mean, self.batch_var, self.beta, self.scale, 0.0000001 ) def testing(): return tf.nn.batch_normalization(inputs, self.pop_mean, self.pop_var, self.beta, self.scale, 0.0000001) if parForTarget!=None: self.parForTarget = parForTarget self.updateScale = self.scale.assign(self.scale*(1-TAU)+self.parForTarget.scale*TAU) self.updateBeta = self.beta.assign(self.beta*(1-TAU)+self.parForTarget.beta*TAU) self.updateTarget = tf.group(self.updateScale, self.updateBeta) self.bnorm = tf.cond(is_training,training,testing)
def running_mean(cost, tag_name, batch_size=1): with tf.name_scope("running_mean_" + tag_name): with tf.variable_scope(tag_name): cost_sum = tf.get_variable( "cost_sum", initializer=tf.zeros_initializer, dtype=tf.float64, shape=(), collections=[tf.GraphKeys.LOCAL_VARIABLES], trainable=False) batches = tf.get_variable( "cost_num_batches", initializer=tf.zeros_initializer, dtype=tf.int32, shape=(), collections=[tf.GraphKeys.LOCAL_VARIABLES], trainable=False) cost_add = tf.assign_add(cost_sum, tf.cast(cost, dtype=tf.float64)) batches_add = tf.assign_add(batches, batch_size) update_cost_mean = tf.group(cost_add, batches_add) reset_batches = tf.assign(batches, 0) reset_cost_sum = tf.assign(cost_sum, 0.0) reset_cost_mean = tf.group(reset_batches, reset_cost_sum) mean_cost = tf.divide( cost_sum, tf.cast(batches, dtype=tf.float64)) train_loss_summary = tf.summary.scalar(tag_name, mean_cost) return reset_cost_mean, update_cost_mean, train_loss_summary
def if_train(): batch_mean, batch_var = tf.nn.moments(inputs, axes=[0, 1, 2]) # compute mean across these axes (all but channels) # Exponential Mov. Avg. Decay (compute moving average of population, update as batches are seen.) train_mean = tf.assign(pop_mean, pop_mean * decay + batch_mean * (1 - decay)) train_var = tf.assign(pop_var, pop_var * decay + batch_var * (1 - decay)) with tf.control_dependencies([train_mean, train_var]): # makes sure the moving averages are updated during training (absent below:) return tf.identity(batch_mean), tf.identity(batch_var)
def getUpdatesForBnRollingAverage(self) : # This function or something similar should stay, even if I clean the BN rolling average. if self._appliedBnInLayer : return [ tf.assign( ref=self._sharedNewMu_B, value=self._newMu_B, validate_shape=True ), tf.assign( ref=self._sharedNewVar_B, value=self._newVar_B, validate_shape=True ) ] else : return []
def overflow_case(): new_scale_val = tf.clip_by_value(self.scale / self.step_factor, self.scale_min, self.scale_max) scale_assign = tf.assign(self.scale, new_scale_val) overflow_iter_assign = tf.assign(self.last_overflow_iteration, self.iteration) with tf.control_dependencies([scale_assign, overflow_iter_assign]): return tf.identity(self.scale)
def __init__(self, is_training, config, input_): self._input = input_ batch_size = input_.batch_size num_steps = input_.num_steps size = config.hidden_size vocab_size = config.vocab_size def rnn_cell(): # With the latest TensorFlow source code (as of Mar 27, 2017), # the BasicLSTMCell will need a reuse parameter which is unfortunately not # defined in TensorFlow 1.0. To maintain backwards compatibility, we add # an argument check here: if 'reuse' in inspect.getargspec( tf.contrib.rnn.BasicRNNCell.__init__).args: return tf.contrib.rnn.BasicRNNCell( size, reuse=tf.get_variable_scope().reuse) else: return tf.contrib.rnn.BasicRNNCell(size) attn_cell = rnn_cell if is_training and config.keep_prob < 1: def attn_cell(): return tf.contrib.rnn.DropoutWrapper( rnn_cell(), output_keep_prob=config.keep_prob) self.cell = tf.contrib.rnn.MultiRNNCell( [attn_cell() for _ in range(config.num_layers)], state_is_tuple=True) self._initial_state = self.cell.zero_state(batch_size, data_type()) self._initial_state_single = self.cell.zero_state(1, data_type()) self.initial = tf.reshape(tf.stack(axis=0, values=self._initial_state_single), [config.num_layers, 1, size], name="test_initial_state") # first implement the less efficient version test_word_in = tf.placeholder(tf.int32, [1, 1], name="test_word_in") state_placeholder = tf.placeholder(tf.float32, [config.num_layers, 1, size], name="test_state_in") # unpacking the input state context l = tf.unstack(state_placeholder, axis=0) test_input_state = tuple([l[idx] for idx in range(config.num_layers)]) with tf.device("/cpu:0"): self.embedding = tf.get_variable("embedding", [vocab_size, size], dtype=data_type()) inputs = tf.nn.embedding_lookup(self.embedding, input_.input_data) test_inputs = tf.nn.embedding_lookup(self.embedding, test_word_in) # test time with tf.variable_scope("RNN"): (test_cell_output, test_output_state) = self.cell(test_inputs[:, 0, :], test_input_state) test_state_out = tf.reshape(tf.stack(axis=0, values=test_output_state), [config.num_layers, 1, size], name="test_state_out") test_cell_out = tf.reshape(test_cell_output, [1, size], name="test_cell_out") # above is the first part of the graph for test # test-word-in # > ---- > test-state-out # test-state-in > test-cell-out # below is the 2nd part of the graph for test # test-word-out # > prob(word | test-word-out) # test-cell-in test_word_out = tf.placeholder(tf.int32, [1, 1], name="test_word_out") cellout_placeholder = tf.placeholder(tf.float32, [1, size], name="test_cell_in") softmax_w = tf.get_variable("softmax_w", [size, vocab_size], dtype=data_type()) softmax_b = tf.get_variable("softmax_b", [vocab_size], dtype=data_type()) test_logits = tf.matmul(cellout_placeholder, softmax_w) + softmax_b test_softmaxed = tf.nn.log_softmax(test_logits) p_word = test_softmaxed[0, test_word_out[0, 0]] test_out = tf.identity(p_word, name="test_out") if is_training and config.keep_prob < 1: inputs = tf.nn.dropout(inputs, config.keep_prob) # Simplified version of models/tutorials/rnn/rnn.py's rnn(). # This builds an unrolled LSTM for tutorial purposes only. # In general, use the rnn() or state_saving_rnn() from rnn.py. # # The alternative version of the code below is: # # inputs = tf.unstack(inputs, num=num_steps, axis=1) # outputs, state = tf.contrib.rnn.static_rnn( # cell, inputs, initial_state=self._initial_state) outputs = [] state = self._initial_state with tf.variable_scope("RNN"): for time_step in range(num_steps): if time_step > -1: tf.get_variable_scope().reuse_variables() (cell_output, state) = self.cell(inputs[:, time_step, :], state) outputs.append(cell_output) output = tf.reshape(tf.stack(axis=1, values=outputs), [-1, size]) logits = tf.matmul(output, softmax_w) + softmax_b loss = tf.contrib.legacy_seq2seq.sequence_loss_by_example( [logits], [tf.reshape(input_.targets, [-1])], [tf.ones([batch_size * num_steps], dtype=data_type())]) self._cost = cost = tf.reduce_sum(loss) / batch_size self._final_state = state if not is_training: return self._lr = tf.Variable(0.0, trainable=False) tvars = tf.trainable_variables() grads, _ = tf.clip_by_global_norm(tf.gradients(cost, tvars), config.max_grad_norm) optimizer = tf.train.MomentumOptimizer(self._lr, 0.9) self._train_op = optimizer.apply_gradients( zip(grads, tvars), global_step=tf.contrib.framework.get_or_create_global_step()) self._new_lr = tf.placeholder(tf.float32, shape=[], name="new_learning_rate") self._lr_update = tf.assign(self._lr, self._new_lr)
adder_node = a + b #getting more complex by calling another function add_and_triple = adder_node * 3. #variables are not like constants #there values can be later assigned #they are initialized when we call tf.global_variables_initializer P.S. line 48 W = tf.Variable([.3], tf.float32) bias = tf.Variable([-.3], tf.float32) x = tf.placeholder(tf.float32) linear_model = W * x + bias #now in case we want to reassign the values to a variable #we can make use of assign fn. then we have to run sess like in line 55 fixW = tf.assign(W, [-1.]) fixb = tf.assign(bias, [1.]) #loss function i.e. squared difference between calculated input and expected input y = tf.placeholder(tf.float32) #tf.square just like np.square squared_deltas = tf.square(linear_model - y) #calculate sum accross an axis .Pls read https://www.tensorflow.org/api_docs/python/tf/reduce_sum loss = tf.reduce_sum(squared_deltas) #A session encapsulates the control and state of the TensorFlow runtime. sess = tf.Session() print(sess.run([node1, node2])) print(sess.run([node3])) print(sess.run(adder_node, {a: 3, b: 4.5})) print(sess.run(adder_node, {a: [1, 3], b: [2, 4]}))
def __init__(self, inputs, outputs, updates=[]): self.inputs = list(inputs) self.outputs = list(outputs) with tf.control_dependencies(self.outputs): self.updates = [tf.assign(p, new_p) for (p, new_p) in updates]
def set_value(x, value): tf.assign(x, np.asarray(value)).op.run(session=_get_session())
def factorize(A, hyperparameters): #l1_regularizer_parameter = hyperparameters["l1_regularizer_parameter"] #dimension = hyperparameters["dimension"] #zero_out_threshold = hyperparameters["zero_out_threshold"] #lr = hyperparameters["lr"] #niters = hyperparameters["niters"] l1_regularizer_parameter = .0001 zero_out_thresholds = [1e-7, 1e-7] lr = 3e-3 niters = 10000 intermediate_dimension = 500 # Factorize A in to matrices of shape shapes tf.reset_default_graph() shapes = [(A.shape[0], intermediate_dimension), (intermediate_dimension, A.shape[1])] variables = [] for shape in shapes: variables.append( tf.Variable( tf.random_normal(shape, stddev=.001 + tf.eye(*shape), dtype=tf.float32))) # Multiply the variables together to_optimize = variables[0] + tf.eye( *tuple(variables[0].get_shape().as_list())) for variable in variables[1:]: to_optimize = tf.matmul( to_optimize, tf.eye(*tuple(variable.get_shape().as_list())) + variable) #to_optimize = tf.matmul(to_optimize, variable) assert (tuple(to_optimize.get_shape().as_list()) == tuple(A.shape)) # Construct the optimization target_placeholder = tf.placeholder(tf.float32, shape=A.shape) loss_frobenius_error = tf.norm(target_placeholder - to_optimize) # Add l1 loss l1_parameter_placeholder = tf.placeholder(dtype=tf.float32) l1_regularizer = tf.contrib.layers.l1_regularizer(scale=1.0, scope=None) regularization_penalty = tf.contrib.layers.apply_regularization( l1_regularizer, variables) loss = loss_frobenius_error + l1_parameter_placeholder * regularization_penalty #loss = loss_frobenius_error # Create opt lr_placeholder = tf.placeholder(dtype=tf.float32) opt = tf.train.GradientDescentOptimizer(learning_rate=lr_placeholder) minimize = opt.minimize(loss) # Zero out values below absolute threshold zero_ops = [] with tf.control_dependencies([minimize]): for matrix, thresh in zip(variables, zero_out_thresholds): mask = tf.cast( tf.greater(tf.abs(matrix), thresh * tf.ones_like(matrix, dtype=tf.float32)), tf.float32) zero_ops.append(tf.assign(matrix, tf.multiply(mask, matrix))) minimize_and_zero_out = tf.group(zero_ops) # Do optimization sess = tf.Session() sess.run(tf.global_variables_initializer()) results = [] for i in range(niters): _, loss_materialized, loss_frobenius_error_materialized = sess.run( [minimize_and_zero_out, loss, loss_frobenius_error], feed_dict={ target_placeholder: A, l1_parameter_placeholder: l1_regularizer_parameter, lr_placeholder: lr }) if i % 100 == 0: # Also calculate sparsity variables_materialized = sess.run(variables) total_number_of_nnzs = sum( [np.count_nonzero(x) for x in variables_materialized]) nnzs = [np.count_nonzero(x) for x in variables_materialized] print( "Loss: %g, Loss_frob_error: %g, # nnzs in factored matrices: %d, nnzs: %s" % (loss_materialized, loss_frobenius_error_materialized, total_number_of_nnzs, str(nnzs))) results.append( (total_number_of_nnzs, loss_frobenius_error_materialized)) all_results = { "hyperparameters": hyperparameters, "target_matrix": A, "results": results } vs = sess.run(variables) print([np.count_nonzero(v) for v in vs]) return [(v + np.eye(*v.shape), np.count_nonzero(v)) for v in vs], all_results
def sparse_factorize(target_matrix, **hyperparameters): return successive_factorization(target_matrix) hyperparameter_defaults = { "l1_parameter": 0.05, # Tune "l1_parameter_growth": 1, "grow_l1_every_n_iter": 1000, "intermediate_dimension": 2000, # Tune "ntrain_iters": 2000, "n_matrices_to_factorize_into": 20, # Tune "lr": 1e-2, "lr_decay": .995, "decay_lr_every_n_iter": 1000, "init_normal_stdev": .01, "zero_out_threshold": 8e-3, # Tune, "dense_factorize": False, "print_every": 100 } hyperparameters = merge_dict(hyperparameters, hyperparameter_defaults) print("sparse_factorize: Using hyperparameters") print(hyperparameters) M, K = target_matrix.shape Z = hyperparameters["intermediate_dimension"] if hyperparameters["dense_factorize"]: n_mats = hyperparameters["n_matrices_to_factorize_into"] first_shape, last_shape = (M, Z * (n_mats - 2 + 1)), (Z, K) intermediate_shapes = [] for i in range(n_mats - 2): intermediate_shapes.insert(0, (Z, (i + 1) * Z)) to_optimize = [ tf.Variable(tf.random_normal( shp, stddev=hyperparameters["init_normal_stdev"]) + tf.eye(*shp), dtype=tf.float32) for shp in [first_shape] + intermediate_shapes + [last_shape] ] cur_matrix = to_optimize[0] for ind, matrix in enumerate(to_optimize[1:]): cur_matrix_shape = cur_matrix.get_shape().as_list() matrix_shape = matrix.get_shape().as_list() print(cur_matrix_shape[1], matrix_shape[0], matrix_shape[1], Z) print(ind, len(to_optimize)) if ind != len(to_optimize[1:]) - 1: stacked = tf.concat([tf.eye(matrix_shape[1]), matrix], axis=0) else: stacked = matrix #cur_matrix = tf.matmul(cur_matrix, tf.eye(*tuple(stacked.get_shape().as_list()) + stacked)) cur_matrix = tf.matmul(cur_matrix, stacked) else: # Construct graph # Construct variables to optimize to_optimize = [ tf.Variable(tf.random_normal( (M, Z), stddev=hyperparameters["init_normal_stdev"]) + tf.eye(M, Z), dtype=tf.float32) ] + [ tf.Variable(tf.random_normal( (Z, Z), stddev=hyperparameters["init_normal_stdev"]) + tf.eye(Z), dtype=tf.float32) for i in range(hyperparameters["n_matrices_to_factorize_into"] - 2) ] + [ tf.Variable(tf.random_normal( (Z, K), stddev=hyperparameters["init_normal_stdev"]) + tf.eye(Z, K), dtype=tf.float32) ] cur_matrix = to_optimize[0] for matrix in to_optimize[1:]: cur_matrix = tf.matmul(cur_matrix, matrix) # Matrix placeholders target_placeholder = tf.placeholder(tf.float32, shape=target_matrix.shape) # Create loss raw_mse_loss = tf.norm(cur_matrix - target_placeholder) # Add regularization loss = raw_mse_loss l1_parameter_placeholder = tf.placeholder(dtype=tf.float32) l1_regularizer = tf.contrib.layers.l1_regularizer( #scale=hyperparameters["l1_parameter"], scope=None scale=1.0, scope=None) regularization_penalty = tf.contrib.layers.apply_regularization( l1_regularizer, to_optimize) loss += l1_parameter_placeholder * regularization_penalty # Create optimizer lr_placeholder = tf.placeholder(dtype=tf.float32) opt = tf.train.GradientDescentOptimizer(learning_rate=lr_placeholder) #opt = tf.train.AdamOptimizer() minimize = opt.minimize(loss) # 0 out values that are small zero_ops = [] with tf.control_dependencies([minimize]): for matrix in to_optimize: mask = tf.cast( tf.greater( tf.abs(matrix), hyperparameters["zero_out_threshold"] * tf.ones_like(matrix, dtype=tf.float32)), tf.float32) zero_ops.append(tf.assign(matrix, tf.multiply(mask, matrix))) minimize = tf.group(zero_ops) # Train cur_learning_rate = hyperparameters["lr"] log_data = [] cur_l1 = hyperparameters["l1_parameter"] sess = tf.Session() sess.run(tf.global_variables_initializer()) for train_iter in range(hyperparameters["ntrain_iters"]): if train_iter % hyperparameters["decay_lr_every_n_iter"] == 0: cur_learning_rate *= hyperparameters["lr_decay"] if train_iter % hyperparameters["grow_l1_every_n_iter"] == 0: cur_l1 *= hyperparameters["l1_parameter_growth"] sampled_x = np.random.randn(target_matrix.shape[1], 1) total_loss_materialized, loss_mse_materialized, _ = (sess.run( [loss, raw_mse_loss, minimize], feed_dict={ target_placeholder: target_matrix, lr_placeholder: cur_learning_rate, l1_parameter_placeholder: cur_l1 })) if train_iter % hyperparameters["print_every"] == 0: # Compute sum of nnz elements n_nnz_elements = 0 for matrix in to_optimize: matrix_materialized = sess.run(matrix) n_nnz_elements += np.count_nonzero(matrix_materialized) print( "Iteration %d of %d, Total Loss (+l1 penalty): %g; MSE loss: %g, nnz: %d" % (train_iter, hyperparameters["ntrain_iters"], total_loss_materialized, loss_mse_materialized, n_nnz_elements)) log_data.append({ "total_loss": total_loss_materialized, "loss_mse": loss_mse_materialized, "nnz": n_nnz_elements }) # Compute number of nonzero elements n_nnz_elements = 0 for matrix in to_optimize: matrix_materialized = sess.run(matrix) n_nnz_elements += np.count_nonzero(matrix_materialized) original_nnz_elements = np.count_nonzero(target_matrix) # Compute frobenius error product_of_matrices = sess.run(cur_matrix) frobenius_error = np.linalg.norm(product_of_matrices - target_matrix) target_matrix_error = np.linalg.norm(target_matrix) # Materialize actual matrices materialized_factorizations = [] for matrix in to_optimize: matrix_materialized = sess.run(matrix) materialized_factorizations.append(matrix_materialized) # Return results results = { "factorized_matrices": materialized_factorizations, "product_of_matrices": product_of_matrices, "frobenius_error": frobenius_error, "original_nnz_elements": original_nnz_elements, "target_matrix": target_matrix, "n_nnz_elements": n_nnz_elements, "hyperparameter_setting": hyperparameters, } # Print summary print("sparse_factorize: Summary") print("-------------------------") print("original matrix nnz elements: %d" % original_nnz_elements) print("nnz elements: %d" % n_nnz_elements) print("frobenius error: %g" % frobenius_error) print("target matrix norm: %g" % target_matrix_error) print("nnzs of factorized matrices: %s" % str([np.count_nonzero(x) for x in materialized_factorizations])) return results
def create_model(inputs, targets): with tf.variable_scope("generator") as scope: out_channels = int(targets.get_shape()[-1]) outputs = create_generator(inputs, out_channels) # create two copies of discriminator, one for real pairs and one for fake pairs # they share the same underlying variables with tf.name_scope("real_discriminator"): with tf.variable_scope("discriminator"): # 2x [batch, height, width, channels] => [batch, 30, 30, 1] predict_real = create_discriminator(inputs, targets) with tf.name_scope("fake_discriminator"): with tf.variable_scope("discriminator", reuse=True): # 2x [batch, height, width, channels] => [batch, 30, 30, 1] predict_fake = create_discriminator(inputs, outputs) with tf.name_scope("discriminator_loss"): # minimizing -tf.log will try to get inputs to 1 # predict_real => 1 # predict_fake => 0 dloss_GAN = tf.reduce_mean( -(tf.log(tf.sigmoid(predict_real) + EPS) + tf.log(1 - tf.sigmoid(predict_fake) + EPS))) dloss_WGAN = tf.reduce_mean(predict_fake - predict_real) #@luyi wgan critic loss discrim_loss = tf.identity(dloss_WGAN) if a.wgan else tf.identity( dloss_GAN) #discriminator loss either from wgan or gan with tf.name_scope("generator_loss"): # predict_fake => 1 # abs(targets - outputs) => 0 gloss_GAN = tf.reduce_mean(-tf.log(tf.sigmoid(predict_fake) + EPS)) gloss_WGAN = tf.reduce_mean(-predict_fake) #@luyi wgan generator loss gen_loss = tf.identity(gloss_WGAN) if a.wgan else tf.identity( gloss_GAN) #@luyi generator loss either from wgan or gan gloss_L1 = tf.reduce_mean(tf.abs(targets - outputs)) with tf.name_scope("discriminator_train"): discrim_tvars = [ var for var in tf.trainable_variables() if var.name.startswith("discriminator") ] print('Discriminator Variables:') for var in discrim_tvars: print(var.name) if not a.wgan: discrim_optim = tf.train.AdamOptimizer(a.lr, a.beta1) else: discrim_optim = tf.train.RMSPropOptimizer(a.lr) clipped_var = [ tf.assign(var, tf.clip_by_value(var, -clip, clip)) for var in discrim_tvars ] #@luyi wgan clip discriminator variables with tf.control_dependencies(clipped_var): #@luyi clip variables first discrim_grads_and_vars = discrim_optim.compute_gradients( discrim_loss, var_list=discrim_tvars) discrim_train = discrim_optim.apply_gradients( discrim_grads_and_vars) with tf.name_scope("generator_train"): #with tf.control_dependencies([discrim_train]): gen_tvars = [ var for var in tf.trainable_variables() if var.name.startswith("generator") ] print('Generator Variables:') for var in gen_tvars: print(var.name) if not a.wgan: gen_optim = tf.train.AdamOptimizer(a.lr, a.beta1) else: gen_optim = tf.train.RMSPropOptimizer(a.lr) #@luyi optimizer gen_grads_and_vars = gen_optim.compute_gradients( gen_loss * a.gan_weight + gloss_L1 * a.l1_weight, var_list=gen_tvars) gen_train = gen_optim.apply_gradients(gen_grads_and_vars) ema = tf.train.ExponentialMovingAverage(decay=0.99) update_losses = ema.apply( [dloss_WGAN, gloss_WGAN, dloss_GAN, gloss_GAN, gloss_L1]) global_step = tf.contrib.framework.get_or_create_global_step() incr_global_step = tf.assign(global_step, global_step + 1) return Model(predict_real=predict_real, predict_fake=predict_fake, dloss_GAN=ema.average(dloss_GAN), dloss_WGAN=ema.average(dloss_WGAN), discrim_grads_and_vars=discrim_grads_and_vars, gloss_GAN=ema.average(gloss_GAN), gloss_WGAN=ema.average(gloss_WGAN), gloss_L1=ema.average(gloss_L1), gen_grads_and_vars=gen_grads_and_vars, outputs=outputs, update_losses=update_losses, gen_train=gen_train, discrim_train=discrim_train, incr_global_step=incr_global_step, gen_loss=gen_loss, discrim_loss=discrim_loss)
num_layers = 2 poetrys = Poetry() words_size = len(poetrys.word_to_id) inputs = tf.placeholder(tf.int32, [batch_size, None]) targets = tf.placeholder(tf.int32, [batch_size, None]) keep_prob = tf.placeholder(tf.float32, name='keep_prob') model = poetryModel() logits, probs, initial_state, last_state = model.create_model( inputs, batch_size, rnn_size, words_size, num_layers, True, keep_prob) loss = model.loss_model(words_size, targets, logits) learning_rate = tf.Variable(0.0, trainable=False) optimizer = model.optimizer_model(loss, learning_rate) saver = tf.train.Saver() with tf.Session() as sess: sess.run(tf.global_variables_initializer()) sess.run(tf.assign(learning_rate, 0.002 * 0.97)) next_state = sess.run(initial_state) step = 0 while True: x_batch, y_batch = poetrys.next_batch(batch_size) feed = { inputs: x_batch, targets: y_batch, initial_state: next_state, keep_prob: 0.5 } train_loss, _, next_state = sess.run([loss, optimizer, last_state], feed_dict=feed) print("step:%d loss:%f" % (step, train_loss)) if step > 40000: break
def increment_total_steps(self): self.sess.run(tf.assign(self.total_steps, self.total_steps + 1))
def build_model(self): #placeholder self.u = tf.placeholder(tf.int32, [ None, ]) # user idx [B] self.hist_i = tf.placeholder(tf.int32, [None, None]) # history click[B, T] self.sl = tf.placeholder(tf.int32, [ None, ]) # history len [B] self.last = tf.placeholder(tf.int32, [ None, ]) # last click[B] self.basic = tf.placeholder( tf.float32, [None, None]) #user basic feature[B,basic_size] self.sub_sample = tf.placeholder( tf.int32, [None, None]) # soft layer (pos_clict,neg_list)[B,sub_size] self.y = tf.placeholder(tf.float32, [None, None]) # label one hot[B] self.lr = tf.placeholder(tf.float64, []) #emb variable item_emb_w = tf.get_variable("item_emb_w", [self.item_count, self.embedding_size]) item_b = tf.get_variable("item_b", [self.item_count], initializer=tf.constant_initializer(0.0)) brand_emb_w = tf.get_variable("brand_emb_w", [self.brand_count, self.embedding_size]) msort_emb_w = tf.get_variable("msort_emb_w", [self.msort_count, self.embedding_size]) brand_list = tf.convert_to_tensor(self.brand_list, dtype=tf.int32) msort_list = tf.convert_to_tensor(self.msort_list, dtype=tf.int32) #historty seq hist_b = tf.gather(brand_list, self.hist_i) hist_m = tf.gather(msort_list, self.hist_i) h_emb = tf.concat([ tf.nn.embedding_lookup(item_emb_w, self.hist_i), tf.nn.embedding_lookup(brand_emb_w, hist_b), tf.nn.embedding_lookup(msort_emb_w, hist_m) ], axis=2) #historty mask mask = tf.sequence_mask(self.sl, tf.shape(h_emb)[1], dtype=tf.float32) #[B,T] mask = tf.expand_dims(mask, -1) #[B,T,1] mask = tf.tile(mask, [1, 1, tf.shape(h_emb)[2]]) #[B,T,3*e] h_emb *= mask #[B,T,3*e] hist = tf.reduce_sum(h_emb, 1) #[B,3*e] hist = tf.div(hist, tf.cast( tf.tile(tf.expand_dims(self.sl, 1), [1, 3 * self.embedding_size]), tf.float32)) #[B,3*e] #last last_b = tf.gather(brand_list, self.last) last_m = tf.gather(msort_list, self.last) l_emb = tf.concat([ tf.nn.embedding_lookup(item_emb_w, self.last), tf.nn.embedding_lookup(brand_emb_w, last_b), tf.nn.embedding_lookup(msort_emb_w, last_m) ], axis=1) #net input self.input = tf.concat([hist, l_emb], axis=-1) # print('',) # dd net bn = tf.layers.batch_normalization(inputs=self.input, name='b1') layer_1 = tf.layers.dense(bn, 1024, activation=tf.nn.relu, name='f1') layer_2 = tf.layers.dense(layer_1, 512, activation=tf.nn.relu, name='f2') layer_3 = tf.layers.dense(layer_2, 3 * self.embedding_size, activation=tf.nn.relu, name='f3') #softmax if self.is_training: sa_b = tf.gather(brand_list, self.sub_sample) sa_m = tf.gather(msort_list, self.sub_sample) sample_w = tf.concat([ tf.nn.embedding_lookup(item_emb_w, self.sub_sample), tf.nn.embedding_lookup(brand_emb_w, sa_b), tf.nn.embedding_lookup(msort_emb_w, sa_m) ], axis=2) #[B,sample,3*e] #sample_w=tf.nn.embedding_lookup(item_emb_w,self.sub_sample) sample_b = tf.nn.embedding_lookup(item_b, self.sub_sample) #[B,sample] user_v = tf.expand_dims(layer_3, 1) #[B,1,3*e] sample_w = tf.transpose(sample_w, perm=[0, 2, 1]) #[B,3*e,sample] self.logits = tf.squeeze(tf.matmul(user_v, sample_w), axis=1) + sample_b # Step variable self.global_step = tf.Variable(0, trainable=False, name='global_step') self.global_epoch_step = tf.Variable(0, trainable=False, name='global_epoch_step') self.global_epoch_step_op = tf.assign(self.global_epoch_step, self.global_epoch_step + 1) ''' self.loss = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits( logits=self.logits, labels=self.y) ) ''' self.yhat = tf.nn.softmax(self.logits) self.loss = tf.reduce_mean(-self.y * tf.log(self.yhat + 1e-24)) trainable_params = tf.trainable_variables() self.opt = tf.train.GradientDescentOptimizer(learning_rate=self.lr) gradients = tf.gradients(self.loss, trainable_params) clip_gradients, _ = tf.clip_by_global_norm(gradients, 5) self.train_op = self.opt.apply_gradients( zip(clip_gradients, trainable_params), global_step=self.global_step) else: all_emb = tf.concat([ item_emb_w, tf.nn.embedding_lookup(brand_emb_w, brand_list), tf.nn.embedding_lookup(msort_emb_w, msort_list) ], axis=1) self.logits = tf.matmul(layer_3, all_emb, transpose_b=True) + item_b self.output = tf.nn.softmax(self.logits)
p1p2_weights = p1_weights * p2_weights p1p2_weights_upper = upper(p1p2_weights) Kw_xx_upper = K_xx * p1p2_weights_upper Kw_xy = K_xy * p1_weights mmd = (tf.reduce_sum(Kw_xx_upper) / num_combos_xx + tf.reduce_sum(K_yy_upper) / num_combos_yy - 2 * tf.reduce_mean(Kw_xy)) return mmd ############################################################################### # Build model. lr = tf.Variable(learning_rate_init, name='lr', trainable=False) lr_update = tf.assign(lr, tf.maximum(lr * 0.5, 1e-8), name='lr_update') z = tf.placeholder(tf.float32, shape=[batch_size, noise_dim], name='z') z_sample = tf.placeholder(tf.float32, shape=[None, noise_dim], name='z_sample') x = tf.placeholder(tf.float32, shape=[batch_size, data_dim], name='x') x_weights = tf.placeholder(tf.float32, shape=[batch_size, 1], name='x_weights') g, g_vars = generator(z, reuse=False) g_sample, _ = generator(z_sample, reuse=True) d_real, d_logit_real, d_vars = discriminator(x, reuse=False) d_fake, d_logit_fake, _ = discriminator(g, reuse=True) # Define losses. mmd = compute_mmd_iw_median_of_means(x, g, x_weights) g_loss = mmd
print(hypothesis.shape, Y.shape) # diff assert hypothesis.shape.as_list() == Y.shape.as_list() diff = (hypothesis - Y) # Back prop (chain rule) d_l1 = diff d_b = d_l1 d_w = tf.matmul(tf.transpose(X), d_l1) print(X, d_l1, d_w) # Updating network using gradients learning_rate = 1e-6 step = [ tf.assign(W, W - learning_rate * d_w), tf.assign(b, b - learning_rate * tf.reduce_mean(d_b)), ] # 7. Running and testing the training process RMSE = tf.reduce_mean(tf.square((Y - hypothesis))) sess = tf.InteractiveSession() init = tf.global_variables_initializer() sess.run(init) for i in range(10000): print(i, sess.run([step, RMSE], feed_dict={X: x_data, Y: y_data})) print(sess.run(hypothesis, feed_dict={X: x_data}))
def main(args): """ restores first source graph and (if exists) checkpoints restores selected variables from target graph with checkpoint weights selected variables of target graph will be overwritten saves new target graph with checkpoints """ if args.sourcecheckpoint is None: source_ckpt_path = tf.train.latest_checkpoint(args.source) else: source_ckpt_path = join(args.source, args.sourcecheckpoint) if args.targetcheckpoint is None: target_ckpt_path = tf.train.latest_checkpoint(args.target) else: target_ckpt_path = join(args.target, args.targetcheckpoint) if args.compare: if target_ckpt_path is not None: print_compare(source_ckpt_path, target_ckpt_path) else: print "no target checkpoint present..." return # exit graph = tf.Graph() with graph.as_default() as g: # create dummy data iterator tf.data.TFRecordDataset("").make_initializable_iterator() graph_path = join(args.target, "graph.meta") # import meta graph from target model print("importing meta graph {}".format(graph_path)) tf.train.import_meta_graph(graph_path) with tf.Session(graph=g) as sess: sess.run(tf.global_variables_initializer()) targetsaver = tf.train.Saver() # if target checkpoint exists restore variables first if target_ckpt_path is not None: print("target checkpoint {}".format(target_ckpt_path)) targetsaver.restore(sess, target_ckpt_path) print("restoring variables from target checkpoint") global_step_op = tf.get_default_graph().get_operation_by_name( "global_step").outputs[0] samples_seen_op = tf.get_default_graph().get_operation_by_name( "samples_seen").outputs[0] # parse restore variables from --variable flag or --scope flag if args.variables is not None: vars_dict = get_variable_dictionary_by_variables( args.variables) elif args.scopes is not None: vars_dict = get_variable_dictionary_by_scope(args.scopes) else: vars_dict = get_variable_dictionary_by_valid_variables( srccheckpoint=source_ckpt_path, trgcheckpoint=target_ckpt_path) for var in sorted(vars_dict.keys()): print("restoring variable {}".format(var)) sourcesaver = tf.train.Saver(var_list=vars_dict) print("restoring selected variables from source checkpoing {}". format(source_ckpt_path)) sourcesaver.restore(sess, source_ckpt_path) step = sess.run(global_step_op) checkpoint = join(args.target, "model.ckpt") if args.reset: sess.run([ tf.assign(global_step_op, 0), tf.assign(samples_seen_op, 0) ]) step = 0 if not args.dry: print("saving variables to {}".format(checkpoint)) targetsaver.save(sess, checkpoint, global_step=step)
grid = 8 image = image[:h // grid * grid, :w // grid * grid, :] mask = mask[:h // grid * grid, :w // grid * grid, :] print('Shape of image: {}'.format(image.shape)) image = np.expand_dims(image, 0) mask = np.expand_dims(mask, 0) input_image = np.concatenate([image, mask], axis=2) sess_config = tf.ConfigProto() sess_config.gpu_options.allow_growth = True with tf.Session(config=sess_config) as sess: input_image = tf.constant(input_image, dtype=tf.float32) output = model.build_server_graph(input_image, config=config) output = (output + 1.) * 127.5 output = tf.reverse(output, [-1]) output = tf.saturate_cast(output, tf.uint8) # load pretrained model vars_list = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES) assign_ops = [] for var in vars_list: vname = var.name from_name = vname var_value = tf.contrib.framework.load_variable( args.checkpoint_dir, from_name) assign_ops.append(tf.assign(var, var_value)) sess.run(assign_ops) print('Model loaded.') result = sess.run(output) cv2.imwrite(args.output, result[0][:, :, ::-1])
def __init__(self, args, session, updates=None): self.args = args self.sess = session # updates if not updates: updates = 0 self.updates = updates self.global_step = tf.get_variable( 'global_step', shape=(), dtype=tf.float32, initializer=tf.constant_initializer(updates), trainable=False) self.step = tf.assign_add(self.global_step, 1) # placeholders table = HashTable(TextFileIdTableInitializer( filename=os.path.join(args.output_dir, 'vocab.txt')), default_value=Vocab.unk()) self.q1_string = tf.placeholder(tf.string, [None, None], name='q1_str') self.q2_string = tf.placeholder(tf.string, [None, None], name='q2_str') self.q1 = tf.placeholder_with_default(table.lookup(self.q1_string), [None, None], name='q1') self.q2 = tf.placeholder_with_default(table.lookup(self.q2_string), [None, None], name='q2') self.q1_len = tf.placeholder(tf.int32, [None], name='q1_len') self.q2_len = tf.placeholder(tf.int32, [None], name='q2_len') self.y = tf.placeholder(tf.int32, [None], name='y') self.dropout_keep_prob = tf.placeholder(tf.float32, (), name='dropout_keep_prob') self.batchsize_a = tf.shape(self.q1_len)[0] self.batchsize_b = tf.shape(self.q2_len)[0] self.maxlen = tf.reduce_max(self.q1_len) self.maxlen = tf.maximum(self.maxlen, tf.reduce_max(self.q2_len)) q1_mask = tf.expand_dims(tf.sequence_mask(self.q1_len, maxlen=self.maxlen, dtype=tf.float32), dim=-1) q2_mask = tf.expand_dims(tf.sequence_mask(self.q2_len, maxlen=self.maxlen, dtype=tf.float32), dim=-1) devices = self.get_available_gpus() or ['/device:CPU:0'] if not args.multi_gpu: devices = devices[:1] if len(devices) == 1: splits = 1 else: splits = [tf.shape(self.q1)[0] // len(devices) ] * (len(devices) - 1) + [-1] # handle uneven split q1 = tf.split(self.q1, splits) q2 = tf.split(self.q2, splits) q1_mask = tf.split(q1_mask, splits) q2_mask = tf.split(q2_mask, splits) y = tf.split(self.y, splits) # network self.network = Network(args) # optimizer lr = tf.get_variable('lr', shape=(), dtype=tf.float32, initializer=tf.constant_initializer(args.lr), trainable=False) lr_next = tf.cond(self.global_step < args.lr_warmup_steps, true_fn=lambda: args.min_lr + (args.lr - args.min_lr) / max( 1, args.lr_warmup_steps) * self.global_step, false_fn=lambda: tf.maximum( args.min_lr, args.lr * args.lr_decay_rate**tf.floor( (self.global_step - args.lr_warmup_steps ) / args.lr_decay_steps))) tf.add_to_collection(tf.GraphKeys.UPDATE_OPS, tf.assign(lr, lr_next, name='update_lr')) self.lr = lr self.opt = tf.train.AdamOptimizer(learning_rate=lr, beta1=args.beta1, beta2=args.beta2) # training graph tower_names = ['tower-{}'.format(i) for i in range(len(devices)) ] if len(devices) > 1 else [''] tower_logits = [] tower_grads = [] summaries = [] loss = 0 with tf.variable_scope(tf.get_variable_scope()): for i, device in enumerate(devices): with tf.device(device): with tf.name_scope(tower_names[i]) as scope: logits = self.network(q1[i], q2[i], q1_mask[i], q2_mask[i], self.dropout_keep_prob, batchsize_a=self.batchsize_a, batchsize_b=self.batchsize_b) tower_logits.append(logits) loss = self.get_loss(logits, y[i]) tf.get_variable_scope().reuse_variables() summaries = tf.get_collection(tf.GraphKeys.SUMMARIES, scope) grads = self.opt.compute_gradients(loss) tower_grads.append(grads) gradients = [] variables = [] for grad_and_vars in zip(*tower_grads): if grad_and_vars[0][0] is None: msg = 'WARNING: trainable variable {} receives no grad.\n'.format( grad_and_vars[0][1].op.name) sys.stderr.write(msg) continue grad = tf.stack([g for g, _ in grad_and_vars]) grad = tf.reduce_mean(grad, 0) v = grad_and_vars[0][ 1] # use the first tower's pointer to the (shared) variable gradients.append(grad) variables.append(v) gradients, self.gnorm = tf.clip_by_global_norm(gradients, self.args.grad_clipping) update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) with tf.control_dependencies(update_ops): self.train_op = self.opt.apply_gradients(zip(gradients, variables)) logits = tf.concat(tower_logits, 0) self.prob = tf.nn.softmax(logits, dim=1, name='prob') self.pred = tf.argmax(input=logits, axis=1, name='pred') self.loss = tf.identity(loss, name='loss') summaries.append(tf.summary.scalar('training/lr', lr)) summaries.append(tf.summary.scalar('training/gnorm', self.gnorm)) summaries.append(tf.summary.scalar('training/loss', self.loss)) # add summary self.summary = tf.summary.merge(summaries) # saver self.saver = tf.train.Saver( [var for var in tf.global_variables() if 'Adam' not in var.name], max_to_keep=args.max_checkpoints)
def main(_): best_acc = 0 best_step = 0 best_acc_istrain = 0 best_step_istrain = 0 # We want to see all the logging messages for this tutorial. tf.logging.set_verbosity(tf.logging.INFO) # Start a new TensorFlow session. sess = tf.InteractiveSession() # Begin by making sure we have the training data we need. If you already have # training data of your own, use `--data_url= ` on the command line to avoid # downloading. model_settings = models.prepare_model_settings( len(new_features_input.prepare_words_list_my(FLAGS.wanted_words.split(','))), FLAGS.sample_rate, FLAGS.clip_duration_ms, FLAGS.window_size_ms, FLAGS.window_stride_ms, FLAGS.dct_coefficient_count) audio_processor = new_features_input.AudioProcessor( FLAGS.data_dir, FLAGS.silence_percentage, FLAGS.wanted_words.split(','), FLAGS.validation_percentage, FLAGS.testing_percentage) fingerprint_size = model_settings['fingerprint_size'] label_count = model_settings['label_count'] training_steps_list = list(map(int, FLAGS.how_many_training_steps.split(','))) learning_rates_list = list(map(float, FLAGS.learning_rate.split(','))) if len(training_steps_list) != len(learning_rates_list): raise Exception( '--how_many_training_steps and --learning_rate must be equal length ' 'lists, but are %d and %d long instead' % (len(training_steps_list), len(learning_rates_list))) ############################################## ############tensorflow modules########## fingerprint_input = tf.placeholder( tf.float32, [None, fingerprint_size], name='fingerprint_input') # ############ 模型创建 ########## istrain = tf.placeholder(tf.bool, name='istrain') logits= models.create_model( fingerprint_input, model_settings, FLAGS.model_architecture, is_training=istrain) ############ 模型创建 ########## # logits, dropout_prob= models.create_model( # fingerprint_input, # model_settings, # FLAGS.model_architecture, # is_training=True) # Define loss and optimizer ############ 真实值 ########## ground_truth_input = tf.placeholder( tf.float32, [None, label_count], name='groundtruth_input') # Optionally we can add runtime checks to spot when NaNs or other symptoms of # numerical errors start occurring during training. control_dependencies = [] if FLAGS.check_nans: checks = tf.add_check_numerics_ops() control_dependencies = [checks] # Create the back propagation and training evaluation machinery in the graph. ############ 交叉熵计算 ########## # with tf.name_scope('cross_entropy'): # cross_entropy_mean = tf.reduce_mean( # tf.nn.softmax_cross_entropy_with_logits( # labels=ground_truth_input, logits=logits)) + beta*loss_norm with tf.name_scope('cross_entropy'): cross_entropy_mean = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits( labels=ground_truth_input, logits=logits)) tf.summary.scalar('cross_entropy', cross_entropy_mean) ############ 学习率、准确率、混淆矩阵 ########## # learning_rate_input 学习率输入(tf.placeholder) # train_step 训练过程 (优化器) # predicted_indices 预测输出索引 # expected_indices 实际希望输出索引 # correct_prediction 正确预测矩阵 # confusion_matrix 混淆矩阵 # evaluation_step 正确分类概率(每个阶段) # global_step 全局训练阶段 # increment_global_step 全局训练阶段递增 learning_rate_input = tf.placeholder( tf.float32, [], name='learning_rate_input') update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) with tf.control_dependencies(update_ops): train_step = tf.train.AdamOptimizer( learning_rate_input).minimize(cross_entropy_mean) # with tf.name_scope('train'), tf.control_dependencies(control_dependencies): # learning_rate_input = tf.placeholder( # tf.float32, [], name='learning_rate_input') # # train_step = tf.train.GradientDescentOptimizer( # # learning_rate_input).minimize(cross_entropy_mean) # with tf.control_dependencies(update_ops): # train_step = tf.train.AdamOptimizer( # learning_rate_input).minimize(cross_entropy_mean) predicted_indices = tf.argmax(logits, 1) expected_indices = tf.argmax(ground_truth_input, 1) correct_prediction = tf.equal(predicted_indices, expected_indices) confusion_matrix = tf.confusion_matrix( expected_indices, predicted_indices, num_classes=label_count) evaluation_step = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) acc = tf.summary.scalar('accuracy', evaluation_step) global_step = tf.train.get_or_create_global_step() increment_global_step = tf.assign(global_step, global_step + 1) saver = tf.train.Saver(tf.global_variables(),max_to_keep=None)# max keep file // moren 5 # Merge all the summaries and write them out to /tmp/retrain_logs (by default) merged_summaries = tf.summary.merge_all() validation_merged_summaries = tf.summary.merge([tf.get_collection(tf.GraphKeys.SUMMARIES,'accuracy'),tf.get_collection(tf.GraphKeys.SUMMARIES,'cross_entropy')]) test_summaries = tf.summary.merge([acc]) test_summaries_istrain = tf.summary.merge([tf.get_collection(tf.GraphKeys.SUMMARIES,'accuracy'),tf.get_collection(tf.GraphKeys.SUMMARIES,'cross_entropy')]) #test_summaries_istrain = tf.summary.merge([acc]) train_writer = tf.summary.FileWriter(FLAGS.summaries_dir + '/train', sess.graph) # validation_writer = tf.summary.FileWriter(FLAGS.summaries_dir + '/validation') test_writer = tf.summary.FileWriter(FLAGS.summaries_dir + '/test') test_istrain_writer = tf.summary.FileWriter(FLAGS.summaries_dir + '/test_istrain') tf.global_variables_initializer().run() start_step = 1 if FLAGS.start_checkpoint: models.load_variables_from_checkpoint(sess, FLAGS.start_checkpoint) start_step = global_step.eval(session=sess) tf.logging.info('Training from step: %d ', start_step) # Save graph.pbtxt. tf.train.write_graph(sess.graph_def, FLAGS.train_dir, FLAGS.model_architecture + '.pbtxt') # Save list of words. with gfile.GFile( os.path.join(FLAGS.train_dir, FLAGS.model_architecture + '_labels.txt'), 'w') as f: f.write('\n'.join(audio_processor.words_list)) ### # model1: fc # model2: conv :940k个parameter # model3:low_latancy_conv:~~model1 # model4: 750k # Training loop. ############################################# ######## 主循环 ###### ############################################# training_steps_max = np.sum(training_steps_list) for training_step in xrange(start_step, training_steps_max + 1): # Figure out what the current learning rate is. ####### 自动切换学习率 ####### if training_step <12000+1: learning_rate_value = learning_rates_list[0]*0.02**(training_step/12000) else: learning_rate_value = learning_rates_list[0]*0.02 #0.015 12000 training_steps_sum = 0 # for i in range(len(training_steps_list)): # training_steps_sum += training_steps_list[i] # if training_step <= training_steps_sum: # learning_rate_value = learning_rates_list[i] # break # Pull the audio samples we'll use for training. ####### audio处理器导入数据 ################################## ##get_data(self, how_many, offset, model_settings, background_frequency, ## background_volume_range, time_shift, mode, sess) ######################################################################## train_fingerprints, train_ground_truth = audio_processor.get_data_my( FLAGS.batch_size, 0, model_settings ,'training') #mid = np.abs(np.max(train_fingerprints) + np.min(train_fingerprints)) / 2 #half = np.max(train_fingerprints) - np.min(train_fingerprints) #train_fingerprints = ((train_fingerprints + mid) / half * 255).astype(int) train_fingerprints_mix, train_ground_truth_mix = mixup_data(train_fingerprints, train_ground_truth, 1) train_fingerprints = np.append(train_fingerprints, train_fingerprints_mix, axis=0) train_ground_truth = np.append(train_ground_truth, train_ground_truth_mix, axis=0) random_index = list(np.arange(FLAGS.batch_size*2)) np.random.shuffle(random_index) train_fingerprints = train_fingerprints[random_index, :] train_ground_truth = train_ground_truth[random_index, :] train_summary, train_accuracy, cross_entropy_value, _, _ = sess.run( [ merged_summaries, evaluation_step, cross_entropy_mean, train_step, increment_global_step ], feed_dict={ fingerprint_input: train_fingerprints, ground_truth_input: train_ground_truth, learning_rate_input: learning_rate_value, istrain:True }) train_writer.add_summary(train_summary, training_step) tf.logging.info('Step #%d: rate %f, accuracy %.1f%%, cross entropy %f' % (training_step, learning_rate_value, train_accuracy * 100, cross_entropy_value)) is_last_step = (training_step == training_steps_max) if (training_step % FLAGS.eval_step_interval) == 0 or is_last_step: ############################################# ######## 测试集重复计算正确率和混淆矩阵 ###### set_size = audio_processor.set_size('testing') tf.logging.info('set_size=%d', set_size) test_fingerprints, test_ground_truth = audio_processor.get_data_my( -1, 0, model_settings,'testing') #mid = np.abs(np.max(test_fingerprints) + np.min(test_fingerprints)) / 2 #half = np.max(test_fingerprints) - np.min(test_fingerprints) #test_fingerprints = ((test_fingerprints + mid) / half * 255).astype(int) final_summary,test_accuracy, conf_matrix = sess.run( [test_summaries,evaluation_step, confusion_matrix], feed_dict={ fingerprint_input: test_fingerprints, ground_truth_input: test_ground_truth, istrain : False }) final_summary_istrain,test_accuracy_istrain= sess.run( [test_summaries_istrain,evaluation_step], feed_dict={ fingerprint_input: test_fingerprints, ground_truth_input: test_ground_truth, istrain : True }) if test_accuracy > best_acc: best_acc = test_accuracy best_step = training_step if test_accuracy_istrain > best_acc_istrain: best_acc_istrain = test_accuracy_istrain best_step_istrain = training_step test_writer.add_summary(final_summary, training_step) test_istrain_writer.add_summary(final_summary_istrain, training_step) tf.logging.info('Confusion Matrix:\n %s' % (conf_matrix)) tf.logging.info('test accuracy = %.1f%% (N=%d)' % (test_accuracy * 100,6882)) tf.logging.info('test_istrain accuracy = %.1f%% (N=%d)' % (test_accuracy_istrain * 100,6882)) tf.logging.info('Best test accuracy before now = %.1f%% (N=%d)' % (best_acc * 100,6882) + ' at step of ' + str(best_step)) tf.logging.info('Best test_istrain accuracy before now = %.1f%% (N=%d)' % (best_acc_istrain * 100,6882) + ' at step of ' + str(best_step_istrain)) # Save the model checkpoint periodically. if (training_step % FLAGS.save_step_interval == 0 or training_step == training_steps_max): checkpoint_path = os.path.join(FLAGS.train_dir + '/'+FLAGS.model_architecture, FLAGS.model_architecture + '.ckpt') tf.logging.info('Saving to "%s-%d"', checkpoint_path, training_step) saver.save(sess, checkpoint_path, global_step=training_step) print_line = 'Best test accuracy before now = %.1f%% (N=%d)' % (best_acc * 100,6882) + ' at step of ' + str(best_step) + '\n' + \ 'Best test_istrain accuracy before now = %.1f%% (N=%d)' % (best_acc_istrain * 100,6882) + ' at step of ' + str(best_step_istrain) if training_step == training_steps_max: with open(FLAGS.train_dir + '/' +FLAGS.model_architecture+ '/details.txt', 'w') as f: f.write(print_line)
def __init__(self, g, K, T, seed=None, Var_bds=None): """ Define symbolic BFE and auxiliary objective expression to be optimized by tensorflow, given a factor graph. We'll use the one default tensorflow computation graph; to make sure we don't redefine it, everytime it'll be cleared/reset whenever a new instance of OneShot is created. :param g: a grounded graph corresponding to a plain old PGM; its factors must have .log_potential_fun callable on tf tensors :param K: num mixture comps :param T: num quad points :param seed: :param Var_bds: [lb, ub] on the variance param of Gaussian rvs """ # convert potentials to log_potential_funs (b/c typically caller only sets potentials instead of log pot) # utils.set_log_potential_funs(g.factors_list) assert all([callable(f.log_potential_fun) for f in g.factors ]), 'factors must have valid log_potential_fun' # group factors together whose log potential functions have the same call signatures factors_with_unique_nb_domain_types, unique_nb_domain_types = \ utils.get_unique_subsets(g.factors_list, lambda f: f.nb_domain_types) print('number of unique factor domain types =', len(unique_nb_domain_types)) print(unique_nb_domain_types) g.init_rv_indices() # will create attributes like Vc, Vc_idx, etc. # g.init_nb() # caller should have always run this (or done sth similar) to ensure g is well defined! tf.reset_default_graph() # clear existing if seed is not None: # note that seed that has been set prior to tf.reset_default_graph will be invalidated tf.set_random_seed( seed) # thus we have to reseed after reset_default_graph zeros_K = tf.zeros(K, dtype=dtype) tau = tf.Variable(zeros_K, trainable=True, name='tau') # mixture weights logits # tau = tf.Variable(tf.random_normal([K], dtype=dtype), trainable=True, name='tau') # mixture weights logits w = tf.nn.softmax(tau, name='w') # mixture weights fix_mix_op = tf.assign( tau, zeros_K) # op that resets mixing weights to uniform bfe = aux_obj = 0 if g.Nd > 0: common_dstates = set(rv.dstates for rv in g.Vd) if len(common_dstates) == 1: common_dstates = common_dstates.pop() else: common_dstates = -1 if common_dstates > 0: # all discrete rvs have the same number of states # Rho = tf.Variable(tf.zeros([g.Nd, K, common_dstates], dtype=dtype), trainable=True, # name='Rho') # dnode categorical prob logits Rho = tf.Variable(tf.random_normal([g.Nd, K, common_dstates], dtype=dtype), trainable=True, name='Rho') # dnode categorical prob logits Pi = tf.nn.softmax(Rho, name='Pi') else: # general case when each dnode can have different num states # Rho = [tf.Variable(tf.zeros([K, rv.dstates], dtype=dtype), trainable=True, name='Rho_%d' % i) for # (i, rv) in enumerate(g.Vd)] # dnode categorical prob logits Rho = [ tf.Variable(tf.random_normal([K, rv.dstates], dtype=dtype), trainable=True, name='Rho_%d' % i) for (i, rv) in enumerate(g.Vd) ] # dnode categorical prob logits Pi = [ tf.nn.softmax(rho, name='Pi_%d' % i) for (i, rho) in enumerate(Rho) ] # convert to probs # assign symbolic belief vars to rvs for rv in g.Vd: i = g.Vd_idx[rv] # ith disc node rv.belief_params_ = {'pi': Pi[i]} # K x dstates[i] matrix # get discrete nodes' contributions to the objective if common_dstates > 0: # all discrete rvs have the same number of states sharing_counts = [ rv.sharing_count for rv in g.Vd ] # for lifting/param sharing; 1s if no lifting delta_bfe, delta_aux_obj = drvs_bfe_obj( rvs=g.Vd, w=w, Pi=Pi, rvs_counts=sharing_counts) bfe += delta_bfe aux_obj += delta_aux_obj else: for rv in g.Vd: delta_bfe, delta_aux_obj = drv_bfe_obj(rv, w) sharing_count = rv.sharing_count bfe += sharing_count * delta_bfe aux_obj += sharing_count * delta_aux_obj clip_op = tf.no_op() # will be replaced with real clip op if Nc > 0 if g.Nc > 0: # assuming Gaussian if Var_bds is None: Var_bds = [5e-3, 10] # currently shared by all cnodes Mu_bds = np.empty([2, g.Nc], dtype='float') for n, rv in enumerate(g.Vc): Mu_bds[:, n] = rv.values[0], rv.values[1] # lb, ub Mu_bds = Mu_bds[:, :, None] + \ np.zeros([2, g.Nc, K], dtype='float') # Mu_bds[0], Mu_bds[1] give lb, ub for Mu; same for all K Mu = np.random.uniform(low=Mu_bds[0], high=Mu_bds[1], size=[g.Nc, K]) # init numerical value if init_grid: # try spreading initial means evenly on a grid within the Mu_bds box set I = int(K**( 1 / g.Nc )) # number of points per dimension; need to have I^{Nc} <= K slices = [] for n, rv in enumerate(g.Vc): lb, ub = rv.values[0], rv.values[1] step = (ub - lb) / (I + 1) slices.append(slice(lb + step, ub, step)) # no boundary points included grid = np.mgrid[slices] # Nc x I x I x .. x I (Nc many Is) num_grid_points = int(I**g.Nc) grid = np.reshape(grid, [g.Nc, num_grid_points]) grid += init_grid_noise * np.random.randn(*grid.shape) Mu[:, : num_grid_points] = grid # the rest have already been initialized Mu = tf.Variable(Mu, dtype=dtype, trainable=True, name='Mu') # optimize the log of Var (sigma squared), for numeric stability lVar_bds = np.log(Var_bds) # lVar = tf.Variable(np.log(np.random.uniform(low=Var_bds[0], high=Var_bds[1], size=[g.Nc, K])), # dtype=dtype, trainable=True, name='lVar') lVar = tf.Variable(np.random.uniform(low=lVar_bds[0], high=lVar_bds[1], size=[g.Nc, K]), dtype=dtype, trainable=True, name='lVar') Var = tf.exp(lVar) clip_op = tf.group( tf.assign(Mu, tf.clip_by_value(Mu, *Mu_bds)), tf.assign(lVar, tf.clip_by_value(lVar, *lVar_bds))) for rv in g.Vc: i = g.Vc_idx[rv] # ith cont node rv.belief_params_ = { 'mu': Mu[i], 'var': Var[i], 'var_inv': 1 / Var[i], 'mu_K1': tf.reshape(Mu[i], [K, 1]), 'var_K1': tf.reshape(Var[i], [K, 1]), 'var_inv_K1': tf.reshape(1 / Var[i], [K, 1]) } # get continuous nodes' contribution to the objectives (assuming all Gaussian for now) sharing_counts = [rv.sharing_count for rv in g.Vc ] # for lifting/param sharing; 1s if no lifting delta_bfe, delta_aux_obj = crvs_bfe_obj(rvs=g.Vc, T=T, w=w, Mu=Mu, Var=Var, rvs_counts=sharing_counts) bfe += delta_bfe aux_obj += delta_aux_obj for factors in factors_with_unique_nb_domain_types: factor = factors[0] if factor.domain_type == 'd': delta_bfe, delta_aux_obj = dfactors_bfe_obj(factors, w) else: assert factor.domain_type in ('c', 'h') delta_bfe, delta_aux_obj = hfactors_bfe_obj(factors, T, w, dtype=dtype) bfe += delta_bfe aux_obj += delta_aux_obj self.__dict__.update(**locals())
def update_epoch(self, epoch, sess): sess.run(self.assign_handler) sess.run(tf.assign(self.now_epoch, int(epoch)))
def no_return_assign(self, ref, value): tf.assign(ref, value) return 0
def main(_): # Pick up any one-off hyper-parameters. hparams = path_model.PathBasedModel.default_hparams() # Set the number of classes classes_filename = os.path.join( FLAGS.dataset_dir, FLAGS.dataset, 'classes.txt') with open(classes_filename) as f_in: classes = f_in.read().splitlines() hparams.num_classes = len(classes) print('Model will predict into %d classes' % hparams.num_classes) # Get the datasets train_set, val_set, test_set = ( os.path.join( FLAGS.dataset_dir, FLAGS.dataset, FLAGS.corpus, filename + '.tfrecs.gz') for filename in ['train', 'val', 'test']) print('Running with hyper-parameters: {}'.format(hparams)) # Load the instances print('Loading instances...') opts = tf.python_io.TFRecordOptions( compression_type=tf.python_io.TFRecordCompressionType.GZIP) train_instances = list(tf.python_io.tf_record_iterator(train_set, opts)) val_instances = list(tf.python_io.tf_record_iterator(val_set, opts)) test_instances = list(tf.python_io.tf_record_iterator(test_set, opts)) # Load the word embeddings print('Loading word embeddings...') lemma_embeddings = lexnet_common.load_word_embeddings( FLAGS.embeddings_base_path, hparams.lemma_embeddings_file) # Define the graph and the model with tf.Graph().as_default(): with tf.variable_scope('lexnet'): options = tf.python_io.TFRecordOptions( compression_type=tf.python_io.TFRecordCompressionType.GZIP) reader = tf.TFRecordReader(options=options) _, train_instance = reader.read( tf.train.string_input_producer([train_set])) shuffled_train_instance = tf.train.shuffle_batch( [train_instance], batch_size=1, num_threads=1, capacity=len(train_instances), min_after_dequeue=100, )[0] train_model = path_model.PathBasedModel( hparams, lemma_embeddings, shuffled_train_instance) with tf.variable_scope('lexnet', reuse=True): val_instance = tf.placeholder(dtype=tf.string) val_model = path_model.PathBasedModel( hparams, lemma_embeddings, val_instance) # Initialize a session and start training logdir = ( '{logdir}/results/{dataset}/path/{corpus}/supervisor.logdir'.format( logdir=FLAGS.logdir, dataset=FLAGS.dataset, corpus=FLAGS.corpus)) best_model_saver = tf.train.Saver() f1_t = tf.placeholder(tf.float32) best_f1_t = tf.Variable(0.0, trainable=False, name='best_f1') assign_best_f1_op = tf.assign(best_f1_t, f1_t) supervisor = tf.train.Supervisor( logdir=logdir, global_step=train_model.global_step) with supervisor.managed_session() as session: # Load the labels print('Loading labels...') val_labels = train_model.load_labels(session, val_instances) save_path = '{logdir}/results/{dataset}/path/{corpus}/'.format( logdir=FLAGS.logdir, dataset=FLAGS.dataset, corpus=FLAGS.corpus) # Train the model print('Training the model...') while True: step = session.run(train_model.global_step) epoch = (step + len(train_instances) - 1) // len(train_instances) if epoch > hparams.num_epochs: break print('Starting epoch %d (step %d)...' % (1 + epoch, step)) epoch_loss = train_model.run_one_epoch(session, len(train_instances)) best_f1 = session.run(best_f1_t) f1 = epoch_completed(val_model, session, epoch, epoch_loss, val_instances, val_labels, best_model_saver, save_path, best_f1) if f1 > best_f1: session.run(assign_best_f1_op, {f1_t: f1}) if f1 < best_f1 - 0.08: tf.logging.fino('Stopping training after %d epochs.\n' % epoch) break # Print the best performance on the validation set best_f1 = session.run(best_f1_t) print('Best performance on the validation set: F1=%.3f' % best_f1) # Save the path embeddings print('Computing the path embeddings...') instances = train_instances + val_instances + test_instances path_index, path_vectors = path_model.compute_path_embeddings( val_model, session, instances) path_emb_dir = '{dir}/path_embeddings/{dataset}/{corpus}/'.format( dir=FLAGS.embeddings_base_path, dataset=FLAGS.dataset, corpus=FLAGS.corpus) if not os.path.exists(path_emb_dir): os.makedirs(path_emb_dir) path_model.save_path_embeddings( val_model, path_vectors, path_index, path_emb_dir)
def body(depth_index, state1, state2, state3, depth_image, max_prob_image, exp_sum, incre): """Loop body.""" # calculate cost ave_feature = ref_tower.get_output() ave_feature2 = tf.square(ref_tower.get_output()) for view in range(0, FLAGS.view_num - 1): homographies = view_homographies[view] homographies = tf.transpose(homographies, perm=[1, 0, 2, 3]) homography = homographies[depth_index] # warped_view_feature = homography_warping(view_towers[view].get_output(), homography) warped_view_feature = tf_transform_homography( view_towers[view].get_output(), homography) ave_feature = ave_feature + warped_view_feature ave_feature2 = ave_feature2 + tf.square(warped_view_feature) ave_feature = ave_feature / FLAGS.view_num ave_feature2 = ave_feature2 / FLAGS.view_num cost = ave_feature2 - tf.square(ave_feature) cost.set_shape( [FLAGS.batch_size, feature_shape[1], feature_shape[2], 32]) # gru reg_cost1, state1 = conv_gru1(-cost, state1, scope='conv_gru1') reg_cost2, state2 = conv_gru2(reg_cost1, state2, scope='conv_gru2') reg_cost3, state3 = conv_gru3(reg_cost2, state3, scope='conv_gru3') reg_cost = tf.layers.conv2d(reg_cost3, 1, 3, padding='same', reuse=tf.AUTO_REUSE, name='prob_conv') prob = tf.exp(reg_cost) # index d_idx = tf.cast(depth_index, tf.float32) if inverse_depth: inv_depth_start = tf.div(1.0, depth_start) inv_depth_end = tf.div(1.0, depth_end) inv_interval = (inv_depth_start - inv_depth_end) / ( tf.cast(depth_num, 'float32') - 1) inv_depth = inv_depth_start - d_idx * inv_interval depth = tf.div(1.0, inv_depth) else: depth = depth_start + d_idx * depth_interval temp_depth_image = tf.reshape(depth, [FLAGS.batch_size, 1, 1, 1]) temp_depth_image = tf.tile(temp_depth_image, [1, feature_shape[1], feature_shape[2], 1]) # update the best update_flag_image = tf.cast(tf.less(max_prob_image, prob), dtype='float32') new_max_prob_image = update_flag_image * prob + ( 1 - update_flag_image) * max_prob_image new_depth_image = update_flag_image * temp_depth_image + ( 1 - update_flag_image) * depth_image max_prob_image = tf.assign(max_prob_image, new_max_prob_image) depth_image = tf.assign(depth_image, new_depth_image) # update counter exp_sum = tf.assign_add(exp_sum, prob) depth_index = tf.add(depth_index, incre) return depth_index, state1, state2, state3, depth_image, max_prob_image, exp_sum, incre
additional_inputs_tf = tf.placeholder(tf.float32, [1]) batch_tf, input_dict_func = helper.tf_batch_and_input_dict(batch, additional_inputs_tf) train_outs_dict, test_outs_dict = model.inference(batch_tf, additional_inputs_tf) generative_dict = model.generative_model(batch_tf) inference_obs_dist = model.obs_dist discriminator_vars = [v for v in tf.trainable_variables() if 'Discriminator' in v.name] generator_vars = [v for v in tf.trainable_variables() if 'Discriminator' not in v.name] # Weight clipping discriminator_vars_flat_concat = tf.concat([tf.reshape(e, [-1]) for e in discriminator_vars], axis=0) max_abs_discriminator_vars = tf.reduce_max(tf.abs(discriminator_vars_flat_concat)) clip_op_list = [] for e in discriminator_vars: clip_op_list.append(tf.assign(e, tf.clip_by_value(e, -0.01, 0.01))) if global_args.optimizer_class == 'RmsProp': train_generator_step_tf = tf.train.RMSPropOptimizer(learning_rate=global_args.learning_rate, momentum=0.9).minimize(train_outs_dict['generator_cost'], var_list=generator_vars, global_step=global_step) train_discriminator_step_tf = tf.train.RMSPropOptimizer(learning_rate=global_args.learning_rate, momentum=0.9).minimize(train_outs_dict['discriminator_cost'], var_list=discriminator_vars, global_step=global_step) elif global_args.optimizer_class == 'Adam': train_generator_step_tf = tf.train.AdamOptimizer(learning_rate=0.0001, beta1=0.5, beta2=0.999, epsilon=1e-08).minimize(train_outs_dict['generator_cost'], var_list=generator_vars, global_step=global_step) train_discriminator_step_tf = tf.train.AdamOptimizer(learning_rate=0.0001, beta1=0.5, beta2=0.999, epsilon=1e-08).minimize(train_outs_dict['discriminator_cost'], var_list=discriminator_vars, global_step=global_step) helper.variable_summaries(train_outs_dict['generator_cost'], '/generator_cost') helper.variable_summaries(train_outs_dict['discriminator_cost'], '/discriminator_cost') init = tf.global_variables_initializer()
def inference_mem(images, cams, depth_num, depth_start, depth_interval, is_master_gpu=True): """ infer depth image from multi-view images and cameras """ # dynamic gpu params depth_end = depth_start + (tf.cast(depth_num, tf.float32) - 1) * depth_interval feature_c = 32 feature_h = FLAGS.max_h / 4 feature_w = FLAGS.max_w / 4 # reference image ref_image = tf.squeeze(tf.slice(images, [0, 0, 0, 0, 0], [-1, 1, -1, -1, 3]), axis=1) ref_cam = tf.squeeze(tf.slice(cams, [0, 0, 0, 0, 0], [-1, 1, 2, 4, 4]), axis=1) # image feature extraction if is_master_gpu: ref_tower = UNetDS2GN({'data': ref_image}, is_training=True, reuse=False) else: ref_tower = UNetDS2GN({'data': ref_image}, is_training=True, reuse=True) ref_feature = ref_tower.get_output() ref_feature2 = tf.square(ref_feature) view_features = [] for view in range(1, FLAGS.view_num): view_image = tf.squeeze(tf.slice(images, [0, view, 0, 0, 0], [-1, 1, -1, -1, -1]), axis=1) view_tower = UNetDS2GN({'data': view_image}, is_training=True, reuse=True) view_features.append(view_tower.get_output()) view_features = tf.stack(view_features, axis=0) # get all homographies view_homographies = [] for view in range(1, FLAGS.view_num): view_cam = tf.squeeze(tf.slice(cams, [0, view, 0, 0, 0], [-1, 1, 2, 4, 4]), axis=1) homographies = get_homographies(ref_cam, view_cam, depth_num=depth_num, depth_start=depth_start, depth_interval=depth_interval) view_homographies.append(homographies) view_homographies = tf.stack(view_homographies, axis=0) # build cost volume by differentialble homography with tf.name_scope('cost_volume_homography'): depth_costs = [] for d in range(depth_num): # compute cost (standard deviation feature) ave_feature = tf.Variable( tf.zeros([FLAGS.batch_size, feature_h, feature_w, feature_c]), name='ave', trainable=False, collections=[tf.GraphKeys.LOCAL_VARIABLES]) ave_feature2 = tf.Variable( tf.zeros([FLAGS.batch_size, feature_h, feature_w, feature_c]), name='ave2', trainable=False, collections=[tf.GraphKeys.LOCAL_VARIABLES]) ave_feature = tf.assign(ave_feature, ref_feature) ave_feature2 = tf.assign(ave_feature2, ref_feature2) def body(view, ave_feature, ave_feature2): """Loop body.""" homography = tf.slice(view_homographies[view], begin=[0, d, 0, 0], size=[-1, 1, 3, 3]) homography = tf.squeeze(homography, axis=1) # warped_view_feature = homography_warping(view_features[view], homography) warped_view_feature = tf_transform_homography( view_features[view], homography) ave_feature = tf.assign_add(ave_feature, warped_view_feature) ave_feature2 = tf.assign_add(ave_feature2, tf.square(warped_view_feature)) view = tf.add(view, 1) return view, ave_feature, ave_feature2 view = tf.constant(0) cond = lambda view, *_: tf.less(view, FLAGS.view_num - 1) _, ave_feature, ave_feature2 = tf.while_loop( cond, body, [view, ave_feature, ave_feature2], back_prop=False, parallel_iterations=1) ave_feature = tf.assign( ave_feature, tf.square(ave_feature) / (FLAGS.view_num * FLAGS.view_num)) ave_feature2 = tf.assign( ave_feature2, ave_feature2 / FLAGS.view_num - ave_feature) depth_costs.append(ave_feature2) cost_volume = tf.stack(depth_costs, axis=1) # filtered cost volume, size of (B, D, H, W, 1) if is_master_gpu: filtered_cost_volume_tower = RegNetUS0({'data': cost_volume}, is_training=True, reuse=False) else: filtered_cost_volume_tower = RegNetUS0({'data': cost_volume}, is_training=True, reuse=True) filtered_cost_volume = tf.squeeze(filtered_cost_volume_tower.get_output(), axis=-1) # depth map by softArgmin with tf.name_scope('soft_arg_min'): # probability volume by soft max probability_volume = tf.nn.softmax(tf.scalar_mul( -1, filtered_cost_volume), axis=1, name='prob_volume') # depth image by soft argmin volume_shape = tf.shape(probability_volume) soft_2d = [] for i in range(FLAGS.batch_size): soft_1d = tf.linspace(depth_start[i], depth_end[i], tf.cast(depth_num, tf.int32)) soft_2d.append(soft_1d) soft_2d = tf.reshape(tf.stack(soft_2d, axis=0), [volume_shape[0], volume_shape[1], 1, 1]) soft_4d = tf.tile(soft_2d, [1, 1, volume_shape[2], volume_shape[3]]) estimated_depth_map = tf.reduce_sum(soft_4d * probability_volume, axis=1) estimated_depth_map = tf.expand_dims(estimated_depth_map, axis=3) # probability map prob_map = get_propability_map(probability_volume, estimated_depth_map, depth_start, depth_interval) # return filtered_depth_map, return estimated_depth_map, prob_map
def __init__(self, config, batch, word_mat=None, filter_sizes=None, embedding_size=None,num_filters=None,trainable=True, l2_reg_lambda=0.0, keep_prob=0.9, graph=None): # Placeholders for input, output and dropout self.config = config self.graph = graph if graph is not None else tf.Graph() self.trainable = trainable if trainable == True: self.input_x, self.input_x1, self.ch, self.qh, self.input_y, self.qa_id = batch.get_next() # self.y1 is (64, 3)self.alterh batch_size is[batch,3,alternative_len,chara_len] else: self.input_x, self.input_x1, self.ch, self.qh = batch.get_next() # self.y1 is (64, 3)self.alterh batch_size is[batch,3,alternative_len,chara_len] self.dropout_keep_prob =keep_prob self.global_step = tf.get_variable('global_step', shape=[], dtype=tf.int32, initializer=tf.constant_initializer(0), trainable=False) self.dropout = tf.placeholder_with_default(0.5, (), name="dropout") # Keeping track of l2 regularization loss (optional) l2_loss = tf.constant(0.0) # Embedding layer with tf.device('/cpu:0'), tf.name_scope("embedding"): self.W = tf.get_variable("word_mat", initializer=tf.constant(word_mat, dtype=tf.float32), trainable=True) self.c_mask = tf.cast(self.input_x, tf.bool) # self.c为填充之后的长度是一致的,用0进行填充 self.q_mask = tf.cast(self.input_x1, tf.bool) if trainable: self.c_maxlen, self.q_maxlen, = config.para_limit, config.ques_limit, else: self.c_maxlen, self.q_maxlen = config.test_para_limit, config.test_ques_limit self.embedded_chars = tf.nn.embedding_lookup(self.W, self.input_x) self.embedded_chars1 = tf.nn.embedding_lookup(self.W, self.input_x1) self.embedded_chars_expanded = tf.expand_dims(self.embedded_chars, -1) self.embedded_question = tf.expand_dims(self.embedded_chars1, -1) S = optimized_trilinear_for_attention([self.embedded_chars_expanded, self.embedded_question], self.c_maxlen, self.q_maxlen, input_keep_prob=1.0 - self.dropout) print(S,"2222222222222222222") # Create a convolution + maxpool layer for each filter size pooled_outputs = [] for i, filter_size in enumerate(filter_sizes): with tf.name_scope("conv-maxpool-%s" % filter_size): # Convolution Layer filter_shape = [filter_size, embedding_size, 1, num_filters] W = tf.Variable(tf.truncated_normal(filter_shape, stddev=0.1), name="W") b = tf.Variable(tf.constant(0.1, shape=[num_filters]), name="b") conv = tf.nn.conv2d( self.embedded_chars_expanded, W, strides=[1, 1, 1, 1], padding="VALID", name="conv") # Apply nonlinearity h = tf.nn.relu(tf.nn.bias_add(conv, b), name="relu") # Maxpooling over the outputs pooled = tf.nn.max_pool( h, ksize=[1, config.para_limit - filter_size + 1, 1, 1], strides=[1, 1, 1, 1], padding='VALID', name="pool") pooled_outputs.append(pooled) # Combine all the pooled features num_filters_total = num_filters * len(filter_sizes) self.h_pool = tf.concat(pooled_outputs, 3) self.h_pool_flat = tf.reshape(self.h_pool, [-1, num_filters_total]) # Add dropout with tf.name_scope("dropout"): self.h_drop = tf.nn.dropout(self.h_pool_flat, self.dropout_keep_prob) # Final (unnormalized) scores and predictions with tf.name_scope("output"): W = tf.get_variable( "W", shape=[num_filters_total, 3], initializer=tf.contrib.layers.xavier_initializer()) b = tf.Variable(tf.constant(0.1, shape=[3]), name="b") l2_loss += tf.nn.l2_loss(W) l2_loss += tf.nn.l2_loss(b) self.scores = tf.nn.xw_plus_b(self.h_drop, W, b, name="scores") self.predictions = tf.argmax(self.scores, 1, name="predictions") # Calculate mean cross-entropy loss with tf.name_scope("loss"): losses = tf.nn.softmax_cross_entropy_with_logits(logits=self.scores, labels=self.input_y) self.loss = tf.reduce_mean(losses) + l2_reg_lambda * l2_loss # Accuracy with tf.name_scope("accuracy"): correct_predictions = tf.equal(self.predictions, tf.argmax(self.input_y, 1)) self.accuracy = tf.reduce_mean(tf.cast(correct_predictions, "float"), name="accuracy") if config.decay is not None: self.var_ema = tf.train.ExponentialMovingAverage(config.decay) ema_op = self.var_ema.apply(tf.trainable_variables()) with tf.control_dependencies([ema_op]): self.loss = tf.identity(self.loss) self.assign_vars = [] for var in tf.global_variables(): v = self.var_ema.average(var) if v: self.assign_vars.append(tf.assign(var, v)) self.lr = tf.minimum(config.init_lr, 0.001 / tf.log(999.) * tf.log(tf.cast(self.global_step, tf.float32) + 1)) self.opt = tf.train.AdamOptimizer(learning_rate=self.lr, beta1=0.8, beta2=0.999, epsilon=1e-7) grads = self.opt.compute_gradients(self.loss) gradients, variables = zip(*grads) capped_grads, _ = tf.clip_by_global_norm( gradients, config.grad_clip) self.train_op = self.opt.apply_gradients( zip(capped_grads, variables), global_step=self.global_step) self.saver = tf.train.Saver(tf.global_variables(), max_to_keep=3)
[F,F,bias], ] train_out =[ [T], [F], [F], [F], ] W = tf.Variable(tf.random_normal([3,1])) init = tf.initialize_all_variables() sess= tf.Session() sess.run(init) # Creating a step function: def step(x): is_greator = tf.greater(x,0) flot_greator = tf.to_float(is_greator) double = tf.multiply(flot_greator,2) return tf.subtract(double,1) output = step((tf.matmul(train_in,W))) error = tf.subtract(train_out,output) mse = tf.reduce_mean(tf.square(error)) # Weight reasignment delta = tf.matmul(train_in,error,transpose_a=True) train = tf.assign(W,tf.add(W,delta)) err, target = 1,0 epoch, max_epoch= 0 , 10 while err >target and epoch < max_epoch: epoch += 1 err, _ = sess.run([mse,train]) print(err, epoch)
def inference_winner_take_all(images, cams, depth_num, depth_start, depth_end, is_master_gpu=True, reg_type='GRU', inverse_depth=False): """ infer disparity image from stereo images and cameras """ if not inverse_depth: depth_interval = (depth_end - depth_start) / (tf.cast(depth_num, tf.float32) - 1) # reference image ref_image = tf.squeeze(tf.slice(images, [0, 0, 0, 0, 0], [-1, 1, -1, -1, 3]), axis=1) ref_cam = tf.squeeze(tf.slice(cams, [0, 0, 0, 0, 0], [-1, 1, 2, 4, 4]), axis=1) # image feature extraction if is_master_gpu: ref_tower = UNetDS2GN({'data': ref_image}, is_training=True, reuse=False) else: ref_tower = UNetDS2GN({'data': ref_image}, is_training=True, reuse=True) view_towers = [] for view in range(1, FLAGS.view_num): view_image = tf.squeeze(tf.slice(images, [0, view, 0, 0, 0], [-1, 1, -1, -1, -1]), axis=1) view_tower = UNetDS2GN({'data': view_image}, is_training=True, reuse=True) view_towers.append(view_tower) # get all homographies view_homographies = [] for view in range(1, FLAGS.view_num): view_cam = tf.squeeze(tf.slice(cams, [0, view, 0, 0, 0], [-1, 1, 2, 4, 4]), axis=1) if inverse_depth: homographies = get_homographies_inv_depth(ref_cam, view_cam, depth_num=depth_num, depth_start=depth_start, depth_end=depth_end) else: homographies = get_homographies(ref_cam, view_cam, depth_num=depth_num, depth_start=depth_start, depth_interval=depth_interval) view_homographies.append(homographies) # gru unit gru1_filters = 16 gru2_filters = 4 gru3_filters = 2 feature_shape = [FLAGS.batch_size, FLAGS.max_h / 4, FLAGS.max_w / 4, 32] gru_input_shape = [feature_shape[1], feature_shape[2]] state1 = tf.zeros( [FLAGS.batch_size, feature_shape[1], feature_shape[2], gru1_filters]) state2 = tf.zeros( [FLAGS.batch_size, feature_shape[1], feature_shape[2], gru2_filters]) state3 = tf.zeros( [FLAGS.batch_size, feature_shape[1], feature_shape[2], gru3_filters]) conv_gru1 = ConvGRUCell(shape=gru_input_shape, kernel=[3, 3], filters=gru1_filters) conv_gru2 = ConvGRUCell(shape=gru_input_shape, kernel=[3, 3], filters=gru2_filters) conv_gru3 = ConvGRUCell(shape=gru_input_shape, kernel=[3, 3], filters=gru3_filters) # initialize variables exp_sum = tf.Variable(tf.zeros( [FLAGS.batch_size, feature_shape[1], feature_shape[2], 1]), name='exp_sum', trainable=False, collections=[tf.GraphKeys.LOCAL_VARIABLES]) depth_image = tf.Variable(tf.zeros( [FLAGS.batch_size, feature_shape[1], feature_shape[2], 1]), name='depth_image', trainable=False, collections=[tf.GraphKeys.LOCAL_VARIABLES]) max_prob_image = tf.Variable(tf.zeros( [FLAGS.batch_size, feature_shape[1], feature_shape[2], 1]), name='max_prob_image', trainable=False, collections=[tf.GraphKeys.LOCAL_VARIABLES]) init_map = tf.zeros( [FLAGS.batch_size, feature_shape[1], feature_shape[2], 1]) # define winner take all loop def body(depth_index, state1, state2, state3, depth_image, max_prob_image, exp_sum, incre): """Loop body.""" # calculate cost ave_feature = ref_tower.get_output() ave_feature2 = tf.square(ref_tower.get_output()) for view in range(0, FLAGS.view_num - 1): homographies = view_homographies[view] homographies = tf.transpose(homographies, perm=[1, 0, 2, 3]) homography = homographies[depth_index] # warped_view_feature = homography_warping(view_towers[view].get_output(), homography) warped_view_feature = tf_transform_homography( view_towers[view].get_output(), homography) ave_feature = ave_feature + warped_view_feature ave_feature2 = ave_feature2 + tf.square(warped_view_feature) ave_feature = ave_feature / FLAGS.view_num ave_feature2 = ave_feature2 / FLAGS.view_num cost = ave_feature2 - tf.square(ave_feature) cost.set_shape( [FLAGS.batch_size, feature_shape[1], feature_shape[2], 32]) # gru reg_cost1, state1 = conv_gru1(-cost, state1, scope='conv_gru1') reg_cost2, state2 = conv_gru2(reg_cost1, state2, scope='conv_gru2') reg_cost3, state3 = conv_gru3(reg_cost2, state3, scope='conv_gru3') reg_cost = tf.layers.conv2d(reg_cost3, 1, 3, padding='same', reuse=tf.AUTO_REUSE, name='prob_conv') prob = tf.exp(reg_cost) # index d_idx = tf.cast(depth_index, tf.float32) if inverse_depth: inv_depth_start = tf.div(1.0, depth_start) inv_depth_end = tf.div(1.0, depth_end) inv_interval = (inv_depth_start - inv_depth_end) / ( tf.cast(depth_num, 'float32') - 1) inv_depth = inv_depth_start - d_idx * inv_interval depth = tf.div(1.0, inv_depth) else: depth = depth_start + d_idx * depth_interval temp_depth_image = tf.reshape(depth, [FLAGS.batch_size, 1, 1, 1]) temp_depth_image = tf.tile(temp_depth_image, [1, feature_shape[1], feature_shape[2], 1]) # update the best update_flag_image = tf.cast(tf.less(max_prob_image, prob), dtype='float32') new_max_prob_image = update_flag_image * prob + ( 1 - update_flag_image) * max_prob_image new_depth_image = update_flag_image * temp_depth_image + ( 1 - update_flag_image) * depth_image max_prob_image = tf.assign(max_prob_image, new_max_prob_image) depth_image = tf.assign(depth_image, new_depth_image) # update counter exp_sum = tf.assign_add(exp_sum, prob) depth_index = tf.add(depth_index, incre) return depth_index, state1, state2, state3, depth_image, max_prob_image, exp_sum, incre # run forward loop exp_sum = tf.assign(exp_sum, init_map) depth_image = tf.assign(depth_image, init_map) max_prob_image = tf.assign(max_prob_image, init_map) depth_index = tf.constant(0) incre = tf.constant(1) cond = lambda depth_index, *_: tf.less(depth_index, depth_num) _, state1, state2, state3, depth_image, max_prob_image, exp_sum, incre = tf.while_loop( cond, body, [ depth_index, state1, state2, state3, depth_image, max_prob_image, exp_sum, incre ], back_prop=False, parallel_iterations=1) # get output forward_exp_sum = exp_sum + 1e-7 forward_depth_map = depth_image return forward_depth_map, max_prob_image / forward_exp_sum
def learn( env, policy_fn, *, timesteps_per_actorbatch, # timesteps per actor per update clip_param, entcoeff, # clipping parameter epsilon, entropy coeff optim_epochs, optim_stepsize, optim_batchsize, # optimization hypers gamma, lam, # advantage estimation max_timesteps=0, max_episodes=0, max_iters=0, max_seconds=0, # time constraint callback=None, # you can do anything in the callback, since it takes locals(), globals() adam_epsilon=1e-5, schedule='constant', # annealing for stepsize parameters (epsilon and adam) gradients=True, hessians=False, model_path='model', output_prefix, sim): #Directory setup: model_dir = 'models/' if not os.path.exists(model_dir): os.makedirs(model_dir) # Setup losses and stuff # ---------------------------------------- ob_space = env.observation_space ac_space = env.action_space pi = policy_fn("pi", ob_space, ac_space) # Construct network for new policy oldpi = policy_fn("oldpi", ob_space, ac_space) # Network for old policy atarg = tf.placeholder( dtype=tf.float32, shape=[None]) # Target advantage function (if applicable) ret = tf.placeholder(dtype=tf.float32, shape=[None]) # Empirical return lrmult = tf.placeholder( name='lrmult', dtype=tf.float32, shape=[]) # learning rate multiplier, updated with schedule clip_param = clip_param * lrmult # Annealed cliping parameter epislon ob = U.get_placeholder_cached(name="ob") ac = pi.pdtype.sample_placeholder([None]) kloldnew = oldpi.pd.kl(pi.pd) ent = pi.pd.entropy() meankl = tf.reduce_mean(kloldnew) meanent = tf.reduce_mean(ent) pol_entpen = (-entcoeff) * meanent ratio = tf.exp(pi.pd.logp(ac) - oldpi.pd.logp(ac)) # pnew / pold surr1 = ratio * atarg # surrogate from conservative policy iteration surr2 = tf.clip_by_value(ratio, 1.0 - clip_param, 1.0 + clip_param) * atarg # pol_surr = -tf.reduce_mean(tf.minimum( surr1, surr2)) # PPO's pessimistic surrogate (L^CLIP) vf_loss = tf.reduce_mean(tf.square(pi.vpred - ret)) total_loss = pol_surr + pol_entpen + vf_loss losses = [pol_surr, pol_entpen, vf_loss, meankl, meanent] loss_names = ["pol_surr", "pol_entpen", "vf_loss", "kl", "ent"] var_list = pi.get_trainable_variables() lossandgradandhessian = U.function( [ob, ac, atarg, ret, lrmult], losses + [U.flatgrad(total_loss, var_list), U.flathess(total_loss, var_list)]) lossandgrad = U.function([ob, ac, atarg, ret, lrmult], losses + [U.flatgrad(total_loss, var_list)]) adam = MpiAdam(var_list, epsilon=adam_epsilon) assign_old_eq_new = U.function( [], [], updates=[ tf.assign(oldv, newv) for (oldv, newv) in zipsame(oldpi.get_variables(), pi.get_variables()) ]) compute_losses = U.function([ob, ac, atarg, ret, lrmult], losses) U.initialize() # Set the logs writer to the folder /tmp/tensorflow_logs tf.summary.FileWriter( '/home/aespielberg/ResearchCode/baselines/baselines/tmp/', graph_def=tf.get_default_session().graph_def) adam.sync() # Prepare for rollouts # ---------------------------------------- seg_gen = traj_segment_generator(pi, env, timesteps_per_actorbatch, stochastic=True) episodes_so_far = 0 timesteps_so_far = 0 iters_so_far = 0 tstart = time.time() lenbuffer = deque(maxlen=100) # rolling buffer for episode lengths rewbuffer = deque(maxlen=100) # rolling buffer for episode rewards assert sum( [max_iters > 0, max_timesteps > 0, max_episodes > 0, max_seconds > 0]) == 1, "Only one time constraint permitted" gradient_indices = get_gradient_indices(pi) while True: if callback: callback(locals(), globals()) #ANDYTODO: add new break condition ''' try: print(np.std(rewbuffer) / np.mean(rewbuffer)) print(rewbuffer) if np.std(rewbuffer) / np.mean(rewbuffer) < 0.01: #TODO: input argument break except: pass #No big ''' if max_timesteps and timesteps_so_far >= max_timesteps: break elif max_episodes and episodes_so_far >= max_episodes: break elif max_iters and iters_so_far >= max_iters: break elif max_seconds and time.time() - tstart >= max_seconds: break if schedule == 'constant': cur_lrmult = 1.0 elif schedule == 'linear': cur_lrmult = max(1.0 - float(timesteps_so_far) / max_timesteps, 0) else: raise NotImplementedError logger.log("********** Iteration %i ************" % iters_so_far) seg = seg_gen.__next__() add_vtarg_and_adv(seg, gamma, lam) # ob, ac, atarg, ret, td1ret = map(np.concatenate, (obs, acs, atargs, rets, td1rets)) ob, ac, atarg, tdlamret = seg["ob"], seg["ac"], seg["adv"], seg[ "tdlamret"] vpredbefore = seg["vpred"] # predicted value function before udpate atarg = (atarg - atarg.mean() ) / atarg.std() # standardized advantage function estimate d = Dataset(dict(ob=ob, ac=ac, atarg=atarg, vtarg=tdlamret), shuffle=not pi.recurrent) optim_batchsize = optim_batchsize or ob.shape[0] if hasattr(pi, "ob_rms"): pi.ob_rms.update(ob) # update running mean/std for policy assign_old_eq_new() # set old parameter values to new parameter values logger.log("Optimizing...") logger.log(fmt_row(13, loss_names)) # Here we do a bunch of optimization epochs over the data for _ in range(optim_epochs): gradient_set = [] losses = [ ] # list of tuples, each of which gives the loss for a minibatch for batch in d.iterate_once(optim_batchsize): *newlosses, g = lossandgrad(batch["ob"], batch["ac"], batch["atarg"], batch["vtarg"], cur_lrmult) gradient_set.append(g) if not sim: adam.update(g, optim_stepsize * cur_lrmult) losses.append(newlosses) logger.log(fmt_row(13, np.mean(losses, axis=0))) print('objective is') print(np.sum(np.mean(losses, axis=0)[0:3])) print(get_model_vars(pi)) if sim: print('return routine') return_routine(pi, d, batch, output_prefix, losses, cur_lrmult, lossandgradandhessian, gradients, hessians, gradient_set) return pi if np.mean(list( map(np.linalg.norm, gradient_set))) < 1e-4: #TODO: make this a variable #TODO: abstract all this away somehow (scope) print('minimized!') return_routine(pi, d, batch, output_prefix, losses, cur_lrmult, lossandgradandhessian, gradients, hessians, gradient_set) return pi print(np.mean(list(map(np.linalg.norm, np.array(gradient_set))))) logger.log("Evaluating losses...") losses = [] for batch in d.iterate_once(optim_batchsize): newlosses = compute_losses(batch["ob"], batch["ac"], batch["atarg"], batch["vtarg"], cur_lrmult) losses.append(newlosses) meanlosses, _, _ = mpi_moments(losses, axis=0) logger.log(fmt_row(13, meanlosses)) for (lossval, name) in zipsame(meanlosses, loss_names): logger.record_tabular("loss_" + name, lossval) logger.record_tabular("ev_tdlam_before", explained_variance(vpredbefore, tdlamret)) lrlocal = (seg["ep_lens"], seg["ep_rets"]) # local values listoflrpairs = MPI.COMM_WORLD.allgather(lrlocal) # list of tuples lens, rews = map(flatten_lists, zip(*listoflrpairs)) lenbuffer.extend(lens) rewbuffer.extend(rews) logger.record_tabular("EpLenMean", np.mean(lenbuffer)) logger.record_tabular("EpRewMean", np.mean(rewbuffer)) logger.record_tabular("EpThisIter", len(lens)) episodes_so_far += len(lens) timesteps_so_far += sum(lens) iters_so_far += 1 logger.record_tabular("EpisodesSoFar", episodes_so_far) logger.record_tabular("TimestepsSoFar", timesteps_so_far) logger.record_tabular("TimeElapsed", time.time() - tstart) if MPI.COMM_WORLD.Get_rank() == 0: logger.dump_tabular() if iters_so_far > 1: U.save_state(model_dir + model_path + str(iters_so_far)) print('out of time') return_routine(pi, d, batch, output_prefix, losses, cur_lrmult, lossandgradandhessian, gradients, hessians, gradient_set) return pi
def create_model(inputs1, inputs2, targets): def create_discriminator(discrim_inputs, discrim_targets): n_layers = 3 layers = [] input = tf.concat([discrim_inputs, discrim_targets], 3) with tf.variable_scope("layer_1"): convolved = conv(input, 3, a.ndf, 2) rectified = lrelu(convolved, 0.2) layers.append(rectified) for i in range(n_layers): with tf.variable_scope("layer_%d" % (len(layers) + 1)): out_channels = a.ndf * min(2**(i + 1), 8) stride = 1 if i == n_layers - 1 else 2 # last layer here has stride 1 convolved = conv(layers[-1], 3, out_channels, stride=stride) rectified = lrelu(convolved, 0.2) layers.append(rectified) with tf.variable_scope("layer_%d" % (len(layers) + 1)): convolved = conv(rectified, 3, 1, 1) output = tf.sigmoid(convolved) layers.append(output) return layers[-1] with tf.variable_scope("generator") as scope: out_channels = int(targets.get_shape()[-1]) outputs = create_generator(inputs1, inputs2, out_channels) with tf.name_scope("real_discriminator"): with tf.variable_scope("discriminator"): predict_real = create_discriminator(inputs1, targets) with tf.name_scope("fake_discriminator"): with tf.variable_scope("discriminator", reuse=True): predict_fake = create_discriminator(inputs1, outputs) with tf.name_scope("discriminator_loss"): discrim_loss = tf.reduce_mean(-(tf.log(predict_real + EPS) + tf.log(1 - predict_fake + EPS))) with tf.name_scope("generator_loss"): gen_loss_GAN = tf.reduce_mean(-tf.log(predict_fake + EPS)) gen_loss_L1 = tf.reduce_mean(tf.abs(targets - outputs)) gen_loss = gen_loss_GAN * a.gan_weight + gen_loss_L1 * a.l1_weight with tf.name_scope("discriminator_train"): discrim_tvars = [ var for var in tf.trainable_variables() if var.name.startswith("discriminator") ] discrim_optim = tf.train.AdamOptimizer(a.lr, a.beta1) discrim_grads_and_vars = discrim_optim.compute_gradients( discrim_loss, var_list=discrim_tvars) discrim_train = discrim_optim.apply_gradients(discrim_grads_and_vars) with tf.name_scope("generator_train"): with tf.control_dependencies([discrim_train]): gen_tvars = [ var for var in tf.trainable_variables() if var.name.startswith("generator") ] gen_optim = tf.train.AdamOptimizer(a.lr, a.beta1) gen_grads_and_vars = gen_optim.compute_gradients( gen_loss, var_list=gen_tvars) gen_train = gen_optim.apply_gradients(gen_grads_and_vars) ema = tf.train.ExponentialMovingAverage(decay=0.99) update_losses = ema.apply([discrim_loss, gen_loss_GAN, gen_loss_L1]) global_step = tf.contrib.framework.get_or_create_global_step() incr_global_step = tf.assign(global_step, global_step + 1) return Model( predict_real=predict_real, predict_fake=predict_fake, discrim_loss=ema.average(discrim_loss), discrim_grads_and_vars=discrim_grads_and_vars, gen_loss_GAN=ema.average(gen_loss_GAN), gen_loss_L1=ema.average(gen_loss_L1), gen_grads_and_vars=gen_grads_and_vars, outputs=outputs, train=tf.group(update_losses, incr_global_step, gen_train), )
dataset_full_path = dataset_path + ".txt" logs_dir = "./logs/" ################## n_inputs = 8 n_outputs = 1 hidden_layers_nodes = [20, 20] dropout_rate = [0.1, 0.1] learning_rate = 0.01 batch_size = 1 iterations = 200 ################## training = tf.Variable(True) mode = tf.placeholder(tf.bool) training_mode_op = tf.assign(training, mode) X = tf.placeholder(tf.float32, [None, n_inputs]) Y = tf.placeholder(tf.float32, [None, n_outputs]) sum_variables = [] # sum of all variables (each weight) cua_variables = [] # sum of all variables mutiplied by itself (cuadratic sum) p_variables = [] # % of variation of each variable weight_cp = [] # copy of last weights to view the % of variation denominador = 1 sum_var_str = [{}, {}, {}] iteration_save = 0 s = []
raw_data = np.random.normal(10, 1, 100) alpha = tf.constant(0.05) beta = tf.Variable(1.0 - alpha) current_value = tf.placeholder(tf.float32) previous_average = tf.Variable(0.0) update_average = alpha * current_value + tf.multiply(beta, previous_average) average_history = tf.summary.scalar('running average', update_average) value_history = tf.summary.scalar('incoming values', current_value) merged = tf.summary.merge_all() log_folder = './logs/' writer = tf.summary.FileWriter(log_folder) initializer = tf.global_variables_initializer() with tf.Session() as session: session.run(initializer) writer.add_graph(session.graph) for index, value in enumerate(raw_data): feed_dict = {current_value: value} summary_string, current_average = session.run([merged, update_average], feed_dict=feed_dict) session.run(tf.assign(previous_average, current_average)) logger.debug('raw data: %.2f current average: %.2f' % (value, current_average)) writer.add_summary(summary_string, index) logger.debug('done') finish_time = time.time() elapsed_hours, elapsed_remainder = divmod(finish_time - start_time, 3600) elapsed_minutes, elapsed_seconds = divmod(elapsed_remainder, 60) logger.info("Time: {:0>2}:{:0>2}:{:05.2f}".format(int(elapsed_hours), int(elapsed_minutes), elapsed_seconds))