def add_decoding_ops(self, language_model: str = None, lm_weight: float = 0.8, word_count_weight: float = 0.0, valid_word_count_weight: float = 2.3): """ Add the ops for decoding j Args: language_model: the file path to the language model to use for beam search decoding or None word_count_weight: The weight added for each added word valid_word_count_weight: The weight added for each in vocabulary word lm_weight: The weight multiplied with the language model scoring """ with tf.name_scope('decoding'): self.lm_weight = tf.placeholder_with_default(lm_weight, shape=(), name='language_model_weight') self.word_count_weight = tf.placeholder_with_default(word_count_weight, shape=(), name='word_count_weight') self.valid_word_count_weight = tf.placeholder_with_default(valid_word_count_weight, shape=(), name='valid_word_count_weight') if language_model: self.softmaxed = tf.log(tf.nn.softmax(self.logits, name='softmax') + 1e-8) / math.log(10) self.decoded, self.log_probabilities = tf.nn.ctc_beam_search_decoder(self.softmaxed, self.sequence_lengths // 2, kenlm_directory_path=language_model, kenlm_weight=self.lm_weight, word_count_weight=self.word_count_weight, valid_word_count_weight=self.valid_word_count_weight, beam_width=100, merge_repeated=False, top_paths=1) else: self.decoded, self.log_probabilities = tf.nn.ctc_greedy_decoder(self.logits, self.sequence_lengths // 2, merge_repeated=True)
def __init__(self, dataset): self._data_set = dataset self.class_count = dataset.class_count self.lat_placeholder = tf.placeholder_with_default(tf.zeros([1], dtype=tf.float32), [None], name='lat_placeholder') self.lng_placeholder = tf.placeholder_with_default(tf.zeros([1], dtype=tf.float32), [None], name='lng_placeholder') self.week_placeholder = tf.placeholder_with_default(tf.zeros([1], dtype=tf.float32), [None], name='week_placeholder') self.ground_truth = tf.placeholder(tf.float32, [None, self.class_count])
def __init__(self, ntoken, ninp, nhid, nlayers, lr=0.001, dropout_ratio=0.5, clip_norm = 0.5, **kwargs): """ :param ntoken: #features(input to encoder) :param ninp: input_size to LSTM(output of encoder) :param nhid: hidden layers in LSTM :param nlayers: number of layers :param dropout: dropout rate """ tf.reset_default_graph() self.data = tf.placeholder(tf.float32, [None, None, ntoken], name="data_") self.target = tf.placeholder(tf.float32, [None, None, ntoken], name="target_") self._ntoken = ntoken self._ninp = ninp self._nhid = nhid self._nlayers = nlayers # Setting to defaults known to work well self._lr = tf.placeholder_with_default(lr, shape=None, name="learn_rate_") self._dropout_ratio = tf.placeholder_with_default(dropout_ratio, shape=None, name="dropout_ratio_") self._clip_norm = tf.placeholder_with_default(clip_norm, shape=None, name="clip_norm_") self.tf_init = tf.global_variables_initializer self.prediction self.loss self.optimize
def test_expected_value(self): shape_ = np.array([2, int(1e3)], np.int32) shape = (tf.constant(shape_) if self.use_static_shape else tf.placeholder_with_default(shape_, shape=None)) # This shape will require broadcasting before sampling. scale_ = np.linspace(0.1, 0.5, 3 * 2).astype(self.dtype).reshape(3, 2) scale = (tf.constant(scale_) if self.use_static_shape else tf.placeholder_with_default(scale_, shape=None)) x = tfp.math.random_rayleigh(shape, scale=scale[..., tf.newaxis], dtype=self.dtype, seed=42) self.assertEqual(self.dtype, x.dtype.as_numpy_dtype) final_shape_ = [3, 2, int(1e3)] if self.use_static_shape: self.assertAllEqual(final_shape_, x.shape) sample_mean = tf.reduce_mean(x, axis=-1, keepdims=True) sample_var = tf.reduce_mean(tf.squared_difference( x, sample_mean), axis=-1) [x_, sample_mean_, sample_var_] = self.evaluate([ x, sample_mean[..., 0], sample_var]) self.assertAllEqual(final_shape_, x_.shape) self.assertAllEqual(np.ones_like(x_, dtype=np.bool), x_ > 0.) self.assertAllClose(np.sqrt(np.pi / 2.) * scale_, sample_mean_, atol=0.05, rtol=0.) self.assertAllClose(0.5 * (4. - np.pi) * scale_**2., sample_var_, atol=0.05, rtol=0.)
def placeholders(self): self._imfiles = tf.placeholder(dtype=tf.string, shape=[None, None], name="image_files") self._commands = tf.placeholder(dtype=tf.float32, shape=[None, None, ds.NUM_COMMANDS], name="commands") self._sqlen = tf.placeholder_with_default(1, shape=[], name="sequence_length") self._bsize = tf.placeholder_with_default(1, shape=[], name="batch_size") self._keep_prob = tf.placeholder_with_default(1.0, shape=[], name="keep_prob") tf.add_to_collection("placeholders", self._imfiles) tf.add_to_collection("placeholders", self._commands) tf.add_to_collection("placeholders", self._sqlen) tf.add_to_collection("placeholders", self._bsize) tf.add_to_collection("placeholders", self._keep_prob) return (self._imfiles, self._commands, self._sqlen, self._bsize, self._keep_prob)
def _test_partial_shape_correctness(self, input, rank, batch_size, grid, interpolation, boundary, expected_value=None): resampler = ResamplerLayer(interpolation=interpolation, boundary=boundary) input_default = tf.random_uniform(input.shape) if batch_size > 0 and rank > 0: input_placeholder = tf.placeholder_with_default( input_default, shape=[batch_size] + [None] * (rank + 1)) elif batch_size <= 0 and rank > 0: input_placeholder = tf.placeholder_with_default( input_default, shape=[None] * (rank + 2)) elif batch_size <= 0 and rank <= 0: input_placeholder = tf.placeholder_with_default( input_default, shape=None) out = resampler(input_placeholder, grid) with self.test_session() as sess: out_value = sess.run( out, feed_dict={input_placeholder: input}) # print(expected_value) # print(out_value) if expected_value is not None: self.assertAllClose(expected_value, out_value)
def test_bad_reshape_size(self): dims = 2 new_batch_shape = [2, 3] old_batch_shape = [2] # 2 != 2*3 new_batch_shape_ph = ( tf.constant(np.int32(new_batch_shape)) if self.is_static_shape else tf.placeholder_with_default( np.int32(new_batch_shape), shape=None)) scale = np.ones(old_batch_shape + [dims], self.dtype) scale_ph = tf.placeholder_with_default( scale, shape=scale.shape if self.is_static_shape else None) mvn = tfd.MultivariateNormalDiag(scale_diag=scale_ph) if self.is_static_shape: with self.assertRaisesRegexp( ValueError, (r"`batch_shape` size \(6\) must match " r"`distribution\.batch_shape` size \(2\)")): tfd.BatchReshape( distribution=mvn, batch_shape=new_batch_shape_ph, validate_args=True) else: with self.assertRaisesOpError(r"Shape sizes do not match."): self.evaluate( tfd.BatchReshape( distribution=mvn, batch_shape=new_batch_shape_ph, validate_args=True).sample())
def test_non_vector_shape(self): dims = 2 new_batch_shape = 2 old_batch_shape = [2] new_batch_shape_ph = ( tf.constant(np.int32(new_batch_shape)) if self.is_static_shape else tf.placeholder_with_default( np.int32(new_batch_shape), shape=None)) scale = np.ones(old_batch_shape + [dims], self.dtype) scale_ph = tf.placeholder_with_default( scale, shape=scale.shape if self.is_static_shape else None) mvn = tfd.MultivariateNormalDiag(scale_diag=scale_ph) if self.is_static_shape: with self.assertRaisesRegexp(ValueError, r".*must be a vector.*"): tfd.BatchReshape( distribution=mvn, batch_shape=new_batch_shape_ph, validate_args=True) else: with self.assertRaisesOpError(r".*must be a vector.*"): self.evaluate( tfd.BatchReshape( distribution=mvn, batch_shape=new_batch_shape_ph, validate_args=True).sample())
def test_non_positive_shape(self): dims = 2 old_batch_shape = [4] if self.is_static_shape: # Unknown first dimension does not trigger size check. Note that # any dimension < 0 is treated statically as unknown. new_batch_shape = [-1, 0] else: new_batch_shape = [-2, -2] # -2 * -2 = 4, same size as the old shape. new_batch_shape_ph = ( tf.constant(np.int32(new_batch_shape)) if self.is_static_shape else tf.placeholder_with_default( np.int32(new_batch_shape), shape=None)) scale = np.ones(old_batch_shape + [dims], self.dtype) scale_ph = tf.placeholder_with_default( scale, shape=scale.shape if self.is_static_shape else None) mvn = tfd.MultivariateNormalDiag(scale_diag=scale_ph) if self.is_static_shape: with self.assertRaisesRegexp(ValueError, r".*must be >=-1.*"): tfd.BatchReshape( distribution=mvn, batch_shape=new_batch_shape_ph, validate_args=True) else: with self.assertRaisesOpError(r".*must be >=-1.*"): self.evaluate( tfd.BatchReshape( distribution=mvn, batch_shape=new_batch_shape_ph, validate_args=True).sample())
def test_batch_vector_sampaxis03_eventaxis12_dynamic(self): # x.shape = sample, event, event, sample, batch x = rng.randn(2, 3, 4, 5, 6) y = x + 0.1 * rng.randn(2, 3, 4, 5, 6) x_ph = tf.placeholder_with_default(input=x, shape=None) y_ph = tf.placeholder_with_default(input=y, shape=None) cov = tfp.stats.covariance( x_ph, y_ph, sample_axis=[0, 3], event_axis=[1, 2]) cov = self.evaluate(cov) self.assertAllEqual((3, 4, 3, 4, 6), cov.shape) cov_kd = tfp.stats.covariance( x_ph, y_ph, sample_axis=[0, 3], event_axis=[1, 2], keepdims=True) cov_kd = self.evaluate(cov_kd) self.assertAllEqual((1, 3, 4, 3, 4, 1, 6), cov_kd.shape) self.assertAllEqual(cov, cov_kd[0, :, :, :, :, 0, :]) for i in range(6): # Iterate over batch index. # Get ith batch of samples, and permute/reshape to [n_samples, n_events] x_i = np.reshape( np.transpose(x[:, :, :, :, i], [0, 3, 1, 2]), [2 * 5, 3 * 4]) y_i = np.reshape( np.transpose(y[:, :, :, :, i], [0, 3, 1, 2]), [2 * 5, 3 * 4]) # Will compare with ith batch of covariance. cov_i = np.reshape(cov[..., i], [3 * 4, 3 * 4]) for m in range(0, 3 * 4, 3): # Iterate over some rows of matrix for n in range(0, 3 * 4, 3): # Iterate over some columns of matrix self.assertAllClose( self._np_cov_1d(x_i[:, m], y_i[:, n]), cov_i[m, n])
def test_broadcasting_explicitly_unsupported(self): old_batch_shape = [4] new_batch_shape = [1, 4, 1] rate_ = self.dtype([1, 10, 2, 20]) rate = tf.placeholder_with_default( rate_, shape=old_batch_shape if self.is_static_shape else None) poisson_4 = tfd.Poisson(rate) new_batch_shape_ph = ( tf.constant(np.int32(new_batch_shape)) if self.is_static_shape else tf.placeholder_with_default( np.int32(new_batch_shape), shape=None)) poisson_141_reshaped = tfd.BatchReshape( poisson_4, new_batch_shape_ph, validate_args=True) x_4 = self.dtype([2, 12, 3, 23]) x_114 = self.dtype([2, 12, 3, 23]).reshape(1, 1, 4) if self.is_static_shape: with self.assertRaisesRegexp(NotImplementedError, "too few batch and event dims"): poisson_141_reshaped.log_prob(x_4) with self.assertRaisesRegexp(NotImplementedError, "unexpected batch and event shape"): poisson_141_reshaped.log_prob(x_114) return with self.assertRaisesOpError("too few batch and event dims"): self.evaluate(poisson_141_reshaped.log_prob(x_4)) with self.assertRaisesOpError("unexpected batch and event shape"): self.evaluate(poisson_141_reshaped.log_prob(x_114))
def setup_val(self, tfname): self.restore = glob(os.path.join(self.checkpoint8, "FCN__*", "*.data*" ))[0].split(".data")[0] filename_queue = tf.train.string_input_producer( [tfname], num_epochs=10) self.image_queue, self.annotation_queue = read_tfrecord_and_decode_into_image_annotation_pair_tensors(filename_queue) self.image = tf.placeholder_with_default(self.image, shape=[None, None, 3]) self.annotation = tf.placeholder_with_default(self.annotation_queue, shape=[None, None, 1]) self.resized_image, resized_annotation = scale_randomly_image_with_annotation_with_fixed_size_output(self.image, self.annotation, (self.size, self.size)) self.resized_annotation = tf.squeeze(resized_annotation) image_batch_tensor = tf.expand_dims(self.image, axis=0) annotation_batch_tensor = tf.expand_dims(self.annotation, axis=0) # Be careful: after adaptation, network returns final labels # and not logits FCN_8s_bis = adapt_network_for_any_size_input(FCN_8s, 32) self.pred, fcn_16s_variables_mapping = FCN_8s_bis(image_batch_tensor=image_batch_tensor, number_of_classes=self.num_labels, is_training=False) self.prob = [h for h in [s for s in [t for t in self.pred.op.inputs][0].op.inputs][0].op.inputs][0] initializer = tf.local_variables_initializer() self.saver = tf.train.Saver() with tf.Session() as sess: sess.run(initializer) self.saver.restore(sess, self.restore)
def _testScaledIdentityComplexAdjoint(self, is_dynamic): shift_ = np.array(-0.5, dtype=np.complex) scale_ = np.array(4 + 2j, dtype=np.complex) shift = tf.placeholder_with_default( shift_, shape=None if is_dynamic else []) scale = tf.placeholder_with_default( scale_, shape=None if is_dynamic else []) bijector = tfb.Affine( shift=shift, scale_identity_multiplier=scale, adjoint=True, validate_args=True) z = np.array([1., 2, 3], dtype=np.complex) y = bijector.forward(z) x = bijector.inverse(z) inv_fwd_z = bijector.inverse(tf.identity(y)) ildj = bijector.inverse_log_det_jacobian(z, event_ndims=1) fldj = bijector.forward_log_det_jacobian(z, event_ndims=1) [x_, y_, inv_fwd_z_, ildj_, fldj_] = self.evaluate([ x, y, inv_fwd_z, ildj, fldj]) self.assertAllClose(np.conj(scale_) * z + shift_, y_) self.assertAllClose((z - shift_) / np.conj(scale_), x_) self.assertAllClose(z, inv_fwd_z_) self.assertAllClose(z.shape[-1] * np.log(np.abs(scale_)), fldj_) self.assertAllClose(-z.shape[-1] * np.log(np.abs(scale_)), ildj_)
def test_non_scalar_transition_batch(self): initial_prob_ = tf.constant([0.6, 0.4], dtype=self.dtype) transition_matrix_ = tf.constant([0.6, 0.4], dtype=self.dtype) observation_locs_ = tf.constant(0.0, dtype=self.dtype) observation_scale_ = tf.constant(0.5, dtype=self.dtype) initial_prob = tf.placeholder_with_default(initial_prob_, shape=None) transition_matrix = tf.placeholder_with_default(transition_matrix_, shape=None) observation_locs = tf.placeholder_with_default(observation_locs_, shape=None) observation_scale = tf.placeholder_with_default(observation_scale_, shape=None) with self.assertRaisesWithPredicateMatch( Exception, lambda e: "scalar batches" in str(e)): model = tfd.HiddenMarkovModel(tfd.Categorical(probs=initial_prob), tfd.Categorical(probs=transition_matrix), tfd.Normal(observation_locs, scale=observation_scale), num_steps=4, validate_args=True) self.evaluate(model.mean())
def test_consistency(self): initial_prob_ = tf.constant([0.6, 0.4], dtype=self.dtype) transition_matrix_ = tf.constant([[0.6, 0.4], [0.3, 0.7]], dtype=self.dtype) observation_locs_ = tf.constant([0.0, 1.0], dtype=self.dtype) observation_scale_ = tf.constant(0.5, dtype=self.dtype) initial_prob = tf.placeholder_with_default(initial_prob_, shape=None) transition_matrix = tf.placeholder_with_default(transition_matrix_, shape=None) observation_locs = tf.placeholder_with_default(observation_locs_, shape=None) observation_scale = tf.placeholder_with_default(observation_scale_, shape=None) model = tfd.HiddenMarkovModel(tfd.Categorical(probs=initial_prob), tfd.Categorical(probs=transition_matrix), tfd.Normal(loc=observation_locs, scale=observation_scale), num_steps=3, validate_args=True) self.run_test_sample_consistent_log_prob(self.evaluate, model, num_samples=100000, center=0.5, radius=0.5, rtol=0.05)
def testScaledDotAttention(self): batch_size = 3 num_heads = 8 values_length = [5, 3, 7] queries_length = [8, 6, 10] depth = 20 queries = tf.placeholder_with_default( np.random.randn(batch_size, num_heads, max(queries_length), depth).astype(np.float32), shape=(None, num_heads, None, depth)) values = tf.placeholder_with_default( np.random.randn(batch_size, num_heads, max(values_length), depth).astype(np.float32), shape=(None, num_heads, None, depth)) keys = values mask = transformer.build_sequence_mask(values_length, num_heads=num_heads) context, attn = transformer.dot_product_attention( queries, keys, values, tf.estimator.ModeKeys.PREDICT, mask=mask) with self.test_session() as sess: context, attn = sess.run([context, attn]) self.assertTupleEqual( (batch_size, num_heads, max(queries_length), depth), context.shape) self.assertTupleEqual( (batch_size, num_heads, max(queries_length), max(values_length)), attn.shape) for i in range(batch_size): length = values_length[i] padding_length = max(values_length) - length if padding_length > 0: self.assertEqual(0.0, np.sum(attn[i, :, :, length:max(values_length)]))
def testShapeGettersWithDynamicShape(self): x = tf.placeholder_with_default(input=[2, 4], shape=None) y = tf.placeholder_with_default(input=[2, 5], shape=None) bijector = tfb.SoftmaxCentered(validate_args=True) self.assertAllEqual( [2, 5], self.evaluate(bijector.forward_event_shape_tensor(x))) self.assertAllEqual( [2, 4], self.evaluate(bijector.inverse_event_shape_tensor(y)))
def testHandlesNonStaticEventNdims(self): x_ = [[[1., 2.], [3., 4.]]] x = tf.placeholder_with_default(x_, shape=None) event_ndims = tf.placeholder_with_default(1, shape=None) bij = ExpOnlyJacobian(forward_min_event_ndims=1) bij.inverse_log_det_jacobian(x, event_ndims=event_ndims) ildj = self.evaluate( bij.inverse_log_det_jacobian(x, event_ndims=event_ndims)) self.assertAllClose(-np.log(x_), ildj)
def testMeanVariance(self): pln = tfd.PoissonLogNormalQuadratureCompound( loc=tf.placeholder_with_default( 0., shape=[] if self.static_shape else None), scale=tf.placeholder_with_default( 1., shape=[] if self.static_shape else None), quadrature_size=10, validate_args=True) self.run_test_sample_consistent_mean_variance(self.evaluate, pln, rtol=0.02)
def _construct_nn(self, use_batch_norm, seperate_validation): tf.reset_default_graph() clear_start([self._ld]) if self._random_state is not None: if self._verbose: print('seed is fixed to {}'.format(self._random_state)) tf.set_random_seed(self._random_state) np.random.seed(self._random_state) layers = [] self._input_ph = tf.placeholder(tf.float32, shape=[None, self.structure[0]], name='input') self._dropout_keep_rate = tf.placeholder_with_default(1., shape=None, name='keep_rate') self._train_mode = tf.placeholder_with_default(False, shape=None, name='train_mode') layers.append(self._input_ph) j = 1 with tf.variable_scope('autoencoder'): for i, n_neurons in enumerate(self.structure[1:-1]): if j == 1: x = tf.layers.dense(self._input_ph, n_neurons, name='hidden_%s' % j, kernel_initializer=tf.truncated_normal_initializer()) else: x = tf.layers.dense(x, n_neurons, name='hidden_%s' % j, kernel_initializer=tf.truncated_normal_initializer()) if use_batch_norm: x = tf.layers.batch_normalization(x, axis=1, training=self._train_mode, scale=False) layers.append(x) x = self.activation_fn(x) layers.append(x) x = tf.layers.dropout(x, tf.subtract(1., self._dropout_keep_rate), name='dropout_%s' % j) layers.append(x) if j == self.encoding_layer_index: x = tf.identity(x, name='encoding') self._encoding = x j += 1 self._output = tf.layers.dense(x, self.structure[-1], name='output', kernel_initializer=tf.truncated_normal_initializer()) self._labels = tf.placeholder(tf.float32, shape=[None, self.structure[-1]], name='label') layers.append(self._output) if self._cpu_only: with tf.device('/cpu:{}'.format(self._cpu_number)): sess = tf.Session(config=self._config) if seperate_validation: self._train_writer = tf.summary.FileWriter(self._ld + 'train/', sess.graph) self._val_writer = tf.summary.FileWriter(self._ld + 'val/', sess.graph) else: self._train_writer = tf.summary.FileWriter(self._ld, sess.graph) else: with tf.device('/gpu:{}'.format(self._gpu_number)): sess = tf.Session(config=self._config) if seperate_validation: self._train_writer = tf.summary.FileWriter(self._ld + 'train/', sess.graph) self._val_writer = tf.summary.FileWriter(self._ld + 'val/') else: self._train_writer = tf.summary.FileWriter(self._ld, sess.graph) self._sess = sess self._network = layers
def testSampleDynamicWithBatchDims(self): loc = tf.placeholder_with_default(input=[[0.], [0.]], shape=[2, 1]) deterministic = tfd.VectorDeterministic(loc) for sample_shape_ in [(), (4,)]: sample_shape = tf.placeholder_with_default( input=np.array(sample_shape_, dtype=np.int32), shape=None) sample_ = self.evaluate(deterministic.sample(sample_shape)) self.assertAllClose( np.zeros(sample_shape_ + (2, 1)).astype(np.float32), sample_)
def testCDFWithDynamicEventShapeUnknownNdims( self, events, histograms, expected_cdf): """Test that dynamically-sized events with unknown shape work.""" event_ph = tf.placeholder_with_default(events, shape=None) histograms_ph = tf.placeholder_with_default(histograms, shape=None) dist = categorical.Categorical(probs=histograms_ph) cdf_op = dist.cdf(event_ph) actual_cdf = self.evaluate(cdf_op) self.assertAllClose(actual_cdf, expected_cdf)
def testSampleProbConsistentBroadcastScalar(self): pln = tfd.PoissonLogNormalQuadratureCompound( loc=tf.placeholder_with_default( [0., -0.5], shape=[2] if self.static_shape else None), scale=tf.placeholder_with_default( 1., shape=[] if self.static_shape else None), quadrature_size=10, validate_args=True) self.run_test_sample_consistent_log_prob( self.evaluate, pln, batch_size=2, rtol=0.1, atol=0.01)
def testMeanVarianceBroadcastBoth(self): pln = tfd.PoissonLogNormalQuadratureCompound( loc=tf.placeholder_with_default( [[0.], [-0.5]], shape=[2, 1] if self.static_shape else None), scale=tf.placeholder_with_default( [[1., 0.9]], shape=[1, 2] if self.static_shape else None), quadrature_size=10, validate_args=True) self.run_test_sample_consistent_mean_variance( self.evaluate, pln, rtol=0.1, atol=0.01)
def test_broadcast_params_dynamic(self): loc = tf.placeholder_with_default(input=rng.rand(5), shape=None) scale = tf.placeholder_with_default( input=np.float64(rng.rand()), shape=None) skewness = tf.placeholder_with_default(input=rng.rand(5), shape=None) sasnorm = tfd.SinhArcsinh( loc=loc, scale=scale, skewness=skewness, validate_args=True) samp = self.evaluate(sasnorm.sample()) self.assertAllEqual((5,), samp.shape)
def testCopy(self): # 5 random index points in R^2 index_points_1 = np.random.uniform(-4., 4., (5, 2)).astype(np.float32) # 10 random index points in R^2 index_points_2 = np.random.uniform(-4., 4., (10, 2)).astype(np.float32) # ==> shape = [6, 25, 2] if not self.is_static: index_points_1 = tf.placeholder_with_default(index_points_1, shape=None) index_points_2 = tf.placeholder_with_default(index_points_2, shape=None) mean_fn = lambda x: np.array([0.], np.float32) kernel_1 = psd_kernels.ExponentiatedQuadratic() kernel_2 = psd_kernels.ExpSinSquared() tp1 = tfd.StudentTProcess( df=3., kernel=kernel_1, index_points=index_points_1, mean_fn=mean_fn, jitter=1e-5) tp2 = tp1.copy(df=4., index_points=index_points_2, kernel=kernel_2) event_shape_1 = [5] event_shape_2 = [10] self.assertEqual(tp1.mean_fn, tp2.mean_fn) self.assertIsInstance(tp1.kernel, psd_kernels.ExponentiatedQuadratic) self.assertIsInstance(tp2.kernel, psd_kernels.ExpSinSquared) if self.is_static or tf.executing_eagerly(): self.assertAllEqual(tp1.batch_shape, tp2.batch_shape) self.assertAllEqual(tp1.event_shape, event_shape_1) self.assertAllEqual(tp2.event_shape, event_shape_2) self.assertEqual(self.evaluate(tp1.df), 3.) self.assertEqual(self.evaluate(tp2.df), 4.) self.assertAllEqual(tp2.index_points, index_points_2) self.assertAllEqual(tp1.index_points, index_points_1) self.assertAllEqual(tp2.index_points, index_points_2) self.assertAllEqual( tf.contrib.util.constant_value(tp1.jitter), tf.contrib.util.constant_value(tp2.jitter)) else: self.assertAllEqual( self.evaluate(tp1.batch_shape_tensor()), self.evaluate(tp2.batch_shape_tensor())) self.assertAllEqual( self.evaluate(tp1.event_shape_tensor()), event_shape_1) self.assertAllEqual( self.evaluate(tp2.event_shape_tensor()), event_shape_2) self.assertEqual(self.evaluate(tp1.jitter), self.evaluate(tp2.jitter)) self.assertEqual(self.evaluate(tp1.df), 3.) self.assertEqual(self.evaluate(tp2.df), 4.) self.assertAllEqual(self.evaluate(tp1.index_points), index_points_1) self.assertAllEqual(self.evaluate(tp2.index_points), index_points_2)
def test_compute_gradients_no_initial_gradients(self): x_ = np.random.rand(1000, 100).astype(self.dtype.as_numpy_dtype) x = tf.placeholder_with_default(x_, shape=x_.shape) y_ = np.random.rand(100, 1).astype(self.dtype.as_numpy_dtype) y = tf.placeholder_with_default(y_, shape=y_.shape) f = lambda x, y: tf.matmul(x, y) # pylint: disable=unnecessary-lambda dfdx, dfdy = self.compute_gradients(f, [x, y]) expected_dfdx = np.transpose(y_) + np.zeros_like(x_) expected_dfdy = np.transpose(np.sum(x_, axis=0, keepdims=True)) self.assertAllClose(dfdx, expected_dfdx, atol=0., rtol=1e-4) self.assertAllClose(dfdy, expected_dfdy, atol=0., rtol=1e-4)
def testBijectorDynamicEventNdims(self): with self.assertRaisesError("Expected scalar"): bij = BrokenBijector(validate_args=True) event_ndims = tf.placeholder_with_default((1, 2), shape=None) self.evaluate( bij.forward_log_det_jacobian(1., event_ndims=event_ndims)) with self.assertRaisesError("Expected scalar"): bij = BrokenBijector(validate_args=True) event_ndims = tf.placeholder_with_default((1, 2), shape=None) self.evaluate( bij.inverse_log_det_jacobian(1., event_ndims=event_ndims))
def _init_placeholders(self): self.c_ph = tf.placeholder(shape=(None, None), dtype=tf.int32, name='c_ph') self.cc_ph = tf.placeholder(shape=(None, None, self.char_limit), dtype=tf.int32, name='cc_ph') self.q_ph = tf.placeholder(shape=(None, None), dtype=tf.int32, name='q_ph') self.qc_ph = tf.placeholder(shape=(None, None, self.char_limit), dtype=tf.int32, name='qc_ph') self.y1_ph = tf.placeholder(shape=(None, ), dtype=tf.int32, name='y1_ph') self.y2_ph = tf.placeholder(shape=(None, ), dtype=tf.int32, name='y2_ph') self.lear_rate_ph = tf.placeholder_with_default(0.0, shape=[], name='learning_rate') self.keep_prob_ph = tf.placeholder_with_default(1.0, shape=[], name='keep_prob_ph') self.is_train_ph = tf.placeholder_with_default(False, shape=[], name='is_train_ph')
def build_inputs(self, full_batch_shape, partial_batch_shape): full_batch_shape = tf.placeholder_with_default( input=np.asarray(full_batch_shape, dtype=np.int32), shape=None) partial_batch_shape = tf.placeholder_with_default( input=np.asarray(partial_batch_shape, dtype=np.int32), shape=None) dist = tfd.Normal(tf.random_normal(partial_batch_shape), 1.) return full_batch_shape, dist
def convnn(x, channels_num, layers_num): # First convolutional layer input_dimensions = x.get_shape().as_list()[1:] filter_shape = [window_size, input_dimensions[-1], channels_num] W = weight_variable(filter_shape) b = bias_variable([input_dimensions[0], channels_num]) layers = [] layers.append(tf.nn.relu(conv1d(x, W) + b)) # Hidden layers filter_shape = [window_size, channels_num, channels_num] W_hidden = weight_variable(filter_shape) b_hidden = bias_variable([input_dimensions[0], channels_num]) for i in range(layers_num): conv_layer = tf.nn.relu(conv1d(layers[i], W_hidden) + b_hidden) keep_prob = tf.placeholder_with_default(1.0, shape=()) dropout = tf.layers.dropout(conv_layer, keep_prob) layers.append(dropout) # x_reshape = tf.reshape(x, [-1, input_dimension]) # filter_shape = [window_size, input_dimensions[-1], channels[0]] # W_1 = weight_variable(filter_shape) # b_1 = bias_variable([input_dimensions[0], channels[0]]) # layer_1 = tf.nn.relu(conv1d(x, W_1) + b_1) # Dropout on hidden layer: RELU layer # layer_1_dropout = tf.nn.dropout(layer_1, keep_prob) # filter_shape = [window_size, channels[0], channels[1]] # W_2 = weight_variable(filter_shape) # b_2 = bias_variable([input_dimensions[0], channels[1]]) # layer_2 = tf.nn.relu(conv1d(layer_1, W_2) + b_2) # # # Dropout on hidden layer: RELU layer # layer_2_dropout = tf.nn.dropout(layer_2, keep_prob) # # # Third convolutional layer # filter_shape = [window_size, channels[1], channels[2]] # W_3 = weight_variable(filter_shape) # b_3 = bias_variable([input_dimensions[0], channels[2]]) # layer_3 = tf.nn.relu(conv1d(layer_2, W_3) + b_3) # filter_shape = [window_size, channels[2], internal_channels_4] # W_4 = weight_variable(filter_shape) # b_4 = bias_variable([input_dimensions[0], internal_channels_4]) # layer_4 = tf.nn.relu(conv1d(layer_3, W_4) + b_4) # filter_shape = [window_size, channels[2], internal_channels_4] # W_5 = weight_variable(filter_shape) # b_5 = bias_variable([input_dimensions[0], internal_channels_4]) # layer_5 = tf.nn.relu(conv1d(layer_4, W_5) + b_5) # logits = tf.layers.dense(inputs=layer_4, units=4) # # print(logits.shape) # # Add dropout operation; 0.6 probability that element will be kept # Output convolutional layer filter_shape = [window_size, channels_num, 4] W_output = weight_variable(filter_shape) b_output = bias_variable([4]) layer_out = conv1d(layers[-1], W_output) + b_output # Loss function with L2 Regularization with beta=0.001 regularizers = tf.nn.l2_loss(W) + layers_num * tf.nn.l2_loss(W_hidden) * layers_num + tf.nn.l2_loss(W_output) return layer_out, regularizers
def rnn_sequence_length(self): return tf.placeholder_with_default( tf.tile(tf.ones([1]), multiples=[self.batch_size]) * self.max_timesteps, shape=[None])
y = tf.nn.softmax(logits_, name='ybar') if logits: return y, logits_ return y class Dummy: pass env = Dummy() with tf.variable_scope('model'): env.x = tf.placeholder(tf.float32, (None, img_size, img_size, img_chan), name='x') env.y = tf.placeholder(tf.float32, (None, n_classes), name='y') env.training = tf.placeholder_with_default(False, (), name='mode') env.ybar, logits = model(env.x, logits=True, training=env.training) with tf.variable_scope('acc'): count = tf.equal(tf.argmax(env.y, axis=1), tf.argmax(env.ybar, axis=1)) env.acc = tf.reduce_mean(tf.cast(count, tf.float32), name='acc') with tf.variable_scope('loss'): xent = tf.nn.softmax_cross_entropy_with_logits(labels=env.y, logits=logits) env.loss = tf.reduce_mean(xent, name='loss') with tf.variable_scope('train_op'): optimizer = tf.train.AdamOptimizer() vs = tf.global_variables()
def main(clean_dir, gen_records, is_laptop, num_epochs, val_start_epoch, summary_start_epoch, train_val_test_split, model_file): if len(train_val_test_split) != 3 or sum(train_val_test_split) != 1: print("ERROR - Train + Val + Test should equal 1") return # In general it is considered good practice to use list comprehension instead of map 99% of the time. # If values are being printed using logging.info, need to set logging verbosity to INFO level or training loss will not print # I'm using a custom printing function, so this does not need to be done # tf.logging.set_verbosity(tf.logging.INFO) tf.logging.set_verbosity(tf.logging.WARN) # Training data needs to be split into training, validation, and testing sets # This needs to be a complete (not relative) path, or glob will run into issues train_frac, val_frac, test_frac = train_val_test_split cat_dog_train_path = '/home/michael/Documents/DataSets/dogs_vs_cats_data/*.jpg' if is_laptop else '/home/michael/hard_drive/datasets/dogs_vs_cats_data/train/*.jpg' training_batch_size = 1 if is_laptop else 110 validation_save_path = create_val_dir() model_dir = 'cat_dog_cnn_laptop' if is_laptop else 'cat_dog_cnn_desktop' ckpt_path = None if model_file: ckpt_path = 'models/{}/{}'.format( model_dir, model_file) if is_laptop else 'models/{}/{}'.format( model_dir, model_file) if gen_records: clear_old_tfrecords() generate_tfrecords(cat_dog_train_path, train_frac, val_frac, test_frac) if clean_dir: clean_model_dir() # A good way to debug programs like this is to run a tf.InteractiveSession() # sess = tf.InteractiveSession() # next_example, next_label = imgs_input_fn(['train_0.tfrecords'], 'train', perform_shuffle=True, repeat_count=5, batch_size=20) train_records, train_record_lengths = get_tfrecords('train') val_records, val_record_lengths = get_tfrecords('val') # Multiplied by 0.6 because training files are 60% of data total_training_files = int(len(glob.glob(cat_dog_train_path)) * train_frac) total_num_steps = int(total_training_files / training_batch_size) print( "TOTAL FILES: {}, NUM_ROTATIONS: {}, TOTAL TRAINING FILES: {}, TOTAL NUM STEPS {}" .format(len(cat_dog_train_path), 1, total_training_files, total_num_steps)) model_fn = fast_cnn_model_fn if is_laptop else cnn_model_fn # Tensorflow importing datasets: https://www.tensorflow.org/programmers_guide/datasets # Random shit on protobuf's queues: https://indico.io/tensorflow-data-inputs-part1-placeholders-protobufs-queues/ tf.reset_default_graph() sess = tf.InteractiveSession() # repeat_count=-1 repeats the dataset indefinitely next_example, next_label = imgs_input_fn(train_records, 'train', perform_shuffle=True, repeat_count=-1, batch_size=training_batch_size) next_val_example, next_val_label = imgs_input_fn(val_records, 'val', perform_shuffle=False, repeat_count=-1, batch_size=VAL_BATCH_SIZE) # Prob going to want to read things like input sizes from a config file (keep things consistent between preparing data, and training the network) image_batch = tf.placeholder_with_default(next_example, shape=[None, 80, 80, 3]) label_batch = tf.placeholder_with_default(next_label, shape=[None, 2]) image_val_batch = tf.placeholder_with_default(next_val_example, shape=[None, 80, 80, 3]) label_val_batch = tf.placeholder_with_default(next_val_label, shape=[None, 2]) # Cannot change histogram summary and then reload model from the same checkpoint loss, predictions = model_fn(image_batch, label_batch, mode=tf.estimator.ModeKeys.TRAIN, params={ "return_estimator": False, "total_num_steps": total_num_steps, "histogram_summary": False, "loss_summary": True, "show_graph": True }) optimizer = tf.train.AdamOptimizer() training_op = optimizer.minimize(loss, name="training_op") num_steps = get_num_steps(train_record_lengths, training_batch_size, DATA_REPETITIONS_PER_EPOCH) print("Train record lengths: {}".format(train_record_lengths)) print("Val record lengths: {}".format(val_record_lengths)) num_val_steps = get_num_steps(val_record_lengths, VAL_BATCH_SIZE, 1) os.makedirs("tf_summaries/train", exist_ok=True) os.makedirs("tf_summaries/val", exist_ok=True) merged = tf.summary.merge_all() train_writer = tf.summary.FileWriter('tf_summaries/train', sess.graph) test_writer = tf.summary.FileWriter('tf_summaries/val') print("num_steps: {}".format(num_val_steps)) train_model(sess, num_steps, num_epochs, image_batch, label_batch, loss, predictions, training_op, num_val_steps, image_val_batch, label_val_batch, validation_save_path, merged, train_writer, test_writer, ckpt_path, model_dir, val_start_epoch, summary_start_epoch)
encoding_embedding_size = 512 # colmns in matrix decoding_embedding_size = 512 # colmns in matrix learning_rate = 0.01 learning_rate_decay = 0.9 min_learning_rate = 0.0001 keep_probability = 0.5 # Defining a session tf.reset_default_graph() session = tf.InteractiveSession() # Loading the mode inputs inputs, targets, lr, keep_prob = model_inputs() # Setting the seuqence length sequence_length = tf.placeholder_with_default( 25, None, name='sequence_length' ) # it 's not going to take over 25 words in a sentence # Getting the shape of the inputs tensor input_shape = tf.shape(inputs) # Getting the training and test predictions training_predictions, test_predictions = seq2seq_model( tf.reverse(inputs, [-1]), targets, keep_prob, batch_size, sequence_length, len(answerswords2int), len(questionswords2int), encoding_embedding_size, decoding_embedding_size, rnn_size, num_layers, questionswords2int) # Setting up the Loss Error , the optimizer and Gradient clipping with tf.name_scope("optimization"): loss_error = tf.contrib.seq2seq.sequence_loss( training_predictions, targets,
def _create_queue(self, id_list, shuffle=True, batch_size=16, num_readers=1, min_queue_examples=64, capacity=128, **kwargs): """ Builds the data queue using the '_read_sample' function Parameters ---------- id_list : list or tuple list of examples to read. This can be a list of files or a list of ids or something else the read function understands shuffle : bool flag to toggle shuffling of examples batch_size : int num_readers : int number of readers to spawn to fill the queue. this is used for multi-threading and should be tuned according to the specific problem at hand and hardware available min_queue_examples : int minimum number of examples currently in the queue. This can be tuned for more preloading of the data capacity : int maximum number of examples the queue will hold. a lower number needs less memory whereas a higher number enables better mixing of the examples kwargs : additional arguments to be passed to the reader function Returns ------- list list of tensors representing a batch from the queue """ with tf.name_scope(self.name): # Create filename_queue id_tensor = tf.convert_to_tensor(id_list, dtype=tf.string) id_queue = tf.train.slice_input_producer([id_tensor], capacity=16, shuffle=shuffle) if num_readers < 1: raise ValueError('Please make num_readers at least 1') # Build a FIFO or a shuffled queue if shuffle: examples_queue = tf.RandomShuffleQueue( capacity=capacity, min_after_dequeue=min_queue_examples, dtypes=self.dtypes) else: examples_queue = tf.FIFOQueue( capacity=capacity, dtypes=self.dtypes) if num_readers > 1: # Create multiple readers to populate the queue of examples. enqueue_ops = [] for _ in range(num_readers): ex = self._read_wrapper(id_queue, **kwargs) enqueue_ops.append(examples_queue.enqueue_many(ex)) tf.train.queue_runner.add_queue_runner(tf.train.queue_runner.QueueRunner(examples_queue, enqueue_ops)) ex_tensors = examples_queue.dequeue() ex = [] for t, s in zip(ex_tensors, self.dshapes): t.set_shape(list(s)) t = tf.expand_dims(t, 0) ex.append(t) else: # Use a single reader for population ex = self._read_wrapper(id_queue, **kwargs) # create a batch_size tensor with default shape, to keep the downstream graph flexible batch_size_tensor = tf.placeholder_with_default(batch_size, shape=[], name='batch_size_ph') # batch the read examples ex_batch = tf.train.batch( ex, batch_size=batch_size_tensor, enqueue_many=True, capacity=2 * num_readers * batch_size) return ex_batch
A_odd_rows = tf.concat([_0, -rx, -ry, -_1, rx * y, ry * y, y], axis=-1) A = tf.concat([A_even_rows, A_odd_rows], axis=-1) A = tf.reshape(A, [num_batch, 2 * num_pts, 9]) _, _, V = tf.svd(A, full_matrices=True) return tf.reshape(V[:, :, -1], [num_batch, 3, 3]) if __name__ == '__main__': import cv2 from matplotlib import pyplot as plt from time import time from sys import argv im = cv2.cvtColor(cv2.imread(argv[1]), cv2.COLOR_BGR2RGB) / 255. h = im.shape[0] w = im.shape[1] x = tf.expand_dims( tf.placeholder_with_default(im.astype(np.float32), im.shape), 0) y = rand_warp(x, im.shape[:2]) y = tf.clip_by_value(y + .5, 0.0, 1.) with tf.Session() as sess: t = time() p = sess.run(y) print("Took", 1000 * (time() - t), "ms") plt.subplot(2, 1, 1) plt.imshow(im) plt.subplot(2, 1, 2) plt.imshow(np.squeeze(p)) plt.show()
def create_graph(): graph = tf.Graph() with graph.as_default(): tf.set_random_seed(2899) text = tf.placeholder(tf.float32, shape=[None, batch_size, 1]) text_mask = tf.placeholder(tf.float32, shape=[None, batch_size]) text2 = tf.placeholder(tf.float32, shape=[None, batch_size, 1]) text2_mask = tf.placeholder(tf.float32, shape=[None, batch_size]) mels = tf.placeholder(tf.float32, shape=[None, batch_size, output_size]) mel_mask = tf.placeholder(tf.float32, shape=[None, batch_size]) bias = tf.placeholder_with_default(tf.zeros(shape=[]), shape=[]) cell_dropout = tf.placeholder_with_default(cell_dropout_scale * tf.ones(shape=[]), shape=[]) prenet_dropout = tf.placeholder_with_default(0.5 * tf.ones(shape=[]), shape=[]) bn_flag = tf.placeholder_with_default(tf.zeros(shape=[]), shape=[]) att_w_init = tf.placeholder(tf.float32, shape=[batch_size, 2 * enc_units]) att_k_init = tf.placeholder(tf.float32, shape=[batch_size, window_mixtures]) att_h_init = tf.placeholder(tf.float32, shape=[batch_size, dec_units]) att_c_init = tf.placeholder(tf.float32, shape=[batch_size, dec_units]) h1_init = tf.placeholder(tf.float32, shape=[batch_size, dec_units]) c1_init = tf.placeholder(tf.float32, shape=[batch_size, dec_units]) h2_init = tf.placeholder(tf.float32, shape=[batch_size, dec_units]) c2_init = tf.placeholder(tf.float32, shape=[batch_size, dec_units]) in_mels = mels[:-1, :, :] in_mel_mask = mel_mask[:-1] out_mels = mels[1:, :, :] out_mel_mask = mel_mask[1:] projmel1 = Linear([in_mels], [output_size], prenet_units, dropout_flag_prob_keep=prenet_dropout, name="prenet1", random_state=random_state) projmel2 = Linear([projmel1], [prenet_units], prenet_units, dropout_flag_prob_keep=prenet_dropout, name="prenet2", random_state=random_state) text_char_e, t_c_emb = Embedding(text, char_vocabulary_size, emb_dim, random_state=random_state, name="text_char_emb") text_phone_e, t_p_emb = Embedding(text2, phone_vocabulary_size, emb_dim, random_state=random_state, name="text_phone_emb") conv_text = SequenceConv1dStack([text_char_e + text_phone_e], [emb_dim], n_filts, bn_flag, n_stacks=n_stacks, kernel_sizes=[(1, 1), (3, 3), (5, 5)], name="enc_conv1", random_state=random_state) # text_mask and mask_mask should be the same, doesn't matter which one we use bitext = BiLSTMLayer([conv_text], [n_filts], enc_units, input_mask=text_mask, name="encode_bidir", init=rnn_init, random_state=random_state) def step(inp_t, inp_mask_t, corr_inp_t, att_w_tm1, att_k_tm1, att_h_tm1, att_c_tm1, h1_tm1, c1_tm1, h2_tm1, c2_tm1): o = GaussianAttentionCell( [corr_inp_t], [prenet_units], (att_h_tm1, att_c_tm1), att_k_tm1, bitext, 2 * enc_units, dec_units, att_w_tm1, input_mask=inp_mask_t, conditioning_mask=text_mask, #attention_scale=1. / 10., attention_scale=1., step_op="softplus", name="att", random_state=random_state, cell_dropout=1., #cell_dropout, init=rnn_init) att_w_t, att_k_t, att_phi_t, s = o att_h_t = s[0] att_c_t = s[1] output, s = LSTMCell([corr_inp_t, att_w_t, att_h_t], [prenet_units, 2 * enc_units, dec_units], h1_tm1, c1_tm1, dec_units, input_mask=inp_mask_t, random_state=random_state, cell_dropout=cell_dropout, name="rnn1", init=rnn_init) h1_t = s[0] c1_t = s[1] output, s = LSTMCell([corr_inp_t, att_w_t, h1_t], [prenet_units, 2 * enc_units, dec_units], h2_tm1, c2_tm1, dec_units, input_mask=inp_mask_t, random_state=random_state, cell_dropout=cell_dropout, name="rnn2", init=rnn_init) h2_t = s[0] c2_t = s[1] return output, att_w_t, att_k_t, att_phi_t, att_h_t, att_c_t, h1_t, c1_t, h2_t, c2_t r = scan(step, [in_mels, in_mel_mask, projmel2], [ None, att_w_init, att_k_init, None, att_h_init, att_c_init, h1_init, c1_init, h2_init, c2_init ]) output = r[0] att_w = r[1] att_k = r[2] att_phi = r[3] att_h = r[4] att_c = r[5] h1 = r[6] c1 = r[7] h2 = r[8] c2 = r[9] pred = Linear([output], [dec_units], output_size, name="out_proj", random_state=random_state) """ mix, means, lins = DiscreteMixtureOfLogistics([proj], [output_size], n_output_channels=1, name="dml", random_state=random_state) cc = DiscreteMixtureOfLogisticsCost(mix, means, lins, out_mels, 256) """ # correct masking cc = (pred - out_mels)**2 #cc = out_mel_mask[..., None] * cc #loss = tf.reduce_sum(tf.reduce_sum(cc, axis=-1)) / tf.reduce_sum(out_mel_mask) loss = tf.reduce_mean(tf.reduce_sum(cc, axis=-1)) learning_rate = 0.0001 #steps = tf.Variable(0.) #learning_rate = tf.train.exponential_decay(0.001, steps, staircase=True, # decay_steps=50000, decay_rate=0.5) optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate, use_locking=True) grad, var = zip(*optimizer.compute_gradients(loss)) grad, _ = tf.clip_by_global_norm(grad, 10.) #train_step = optimizer.apply_gradients(zip(grad, var), global_step=steps) train_step = optimizer.apply_gradients(zip(grad, var)) things_names = [ "mels", "mel_mask", "in_mels", "in_mel_mask", "out_mels", "out_mel_mask", "text", "text_mask", "text2", "text2_mask", "bias", "cell_dropout", "prenet_dropout", "bn_flag", "pred", #"mix", "means", "lins", "att_w_init", "att_k_init", "att_h_init", "att_c_init", "h1_init", "c1_init", "h2_init", "c2_init", "att_w", "att_k", "att_phi", "att_h", "att_c", "h1", "c1", "h2", "c2", "loss", "train_step", "learning_rate" ] things_tf = [eval(name) for name in things_names] for tn, tt in zip(things_names, things_tf): graph.add_to_collection(tn, tt) train_model = namedtuple('Model', things_names)(*things_tf) return graph, train_model
mnist = input_data.read_data_sets("/tmp/data/") n_inputs = 28 * 28 n_hidden1 = 300 n_hidden2 = 100 n_outputs = 10 if 'session' in locals() and session is not None: print('Close interactive session') session.close() tf.reset_default_graph() X = tf.placeholder(tf.float32, shape=(None, n_inputs), name='X') y = tf.placeholder(tf.int32, shape=None, name='y') training = tf.placeholder_with_default(False, shape=(), name='training') batch_norm_momentum = 0.9 with tf.name_scope('dnn'): he_init = tf.contrib.layers.variance_scaling_initializer() my_batch_norm_layer = partial(tf.layers.batch_normalization, training=training, momentum=batch_norm_momentum) my_dense_layer = partial(tf.layers.dense, kernel_initializer=he_init) hidden1 = my_dense_layer(X, n_hidden1, name='hidden1') bn1 = tf.nn.elu(my_batch_norm_layer(hidden1)) hidden2 = my_dense_layer(bn1, n_hidden2, name='hidden2')
f.write(pack("s", b"\n")) np.random.seed(20180825) os.environ["CUDA_VISIBLE_DEVICES"] = "0" # use gpu 0 clone_id = "1" dimin, dimout = 28, 7 graph = tf.Graph() with graph.as_default(): x0 = tf.placeholder(tf.float32, [None, dimin * 2], name="x0") x1 = tf.placeholder(tf.float32, [None, dimin * 2], name="x1") y = tf.placeholder(tf.float32, [None, 1], name="y") w = tf.placeholder(tf.float32, [None, 1], name="w") training = tf.placeholder_with_default(False, [], name="training") with tf.variable_scope("clone%s" % clone_id): # load clone1 to model model = buildModel(x0, x1, y, w, dimout, 4, 512, training=training) class Encoder(object): def __init__(self, tf_sess_config=None): with graph.as_default(): asset_path("encoder.index") asset_path("encoder.meta") p = asset_path("encoder.data-00000-of-00001") p = os.path.join(os.path.dirname(p), "encoder") self.sess = tf.Session(config=tf_sess_config) variables = slim.get_variables_to_restore() saver = tf.train.Saver(variables)
label_images = dataset_dict['label_images'] path_list = dataset_dict['path_list'] # You should use your own dataset to construct the placeholder. placeholders = { 'support': [tf.sparse_placeholder(tf.float32) for _ in range(num_supports)], 'features': tf.placeholder(tf.float32, shape=(features.shape[0], features.shape[1])), # sparse_placeholder 'labels': tf.placeholder(tf.float32, shape=(None, y_train.shape[1])), 'labels_mask': tf.placeholder(tf.int32), 'dropout': tf.placeholder_with_default(0.5, shape=()), 'num_features_nonzero': tf.placeholder(tf.int32), # helper variable for sparse dropout 'learning_rate': tf.placeholder(tf.float32, shape=()) } # Create model, features is the input w2v. model = model_func(placeholders, input_dim=features.shape[1], logging=True, layer_structure=FLAGS.layer_structure, layer_types=FLAGS.layer_types, layer=FLAGS.layer)
def fit(self, data_z, data_p, data_y): ''' Fits the treatment response model. Parameters data_z: (n x d np array) of instruments data_p: (n x p np array) of treatments data_y: (n x 1 np array) of outcomes ''' num_instruments = data_z.shape[1] num_treatments = data_p.shape[1] num_outcomes = data_y.shape[1] self.num_treatments = num_treatments # Data iterators for critics/modeler and for meta-critic data_it = LoopIterator( np.arange(data_z.shape[0]), self._batch_size_modeler, random=True) data_it_hedge = LoopIterator( np.arange(data_z.shape[0]), data_z.shape[0], random=self._bootstrap_hedge) # Creat a test grid for calculating loss at intervals test_min = np.percentile(data_p, 5) test_max = np.percentile(data_p, 95) self.test_grid = np.linspace(test_min, test_max, 100) # Create the clusterings of the data that define the critics cluster_labels, cluster_ids = self._data_clusterings(data_z, data_p, data_y) if self._critic_type == 'Gaussian': # We put a symmetric gaussian encompassing all the data points of each cluster of each clustering center_grid = [] precision_grid = [] normalizers = [] for tree in range(cluster_labels.shape[1]): for leaf in cluster_ids[tree]: center = np.mean(data_z[cluster_labels[:, tree].flatten()==leaf, :], axis=0) distance = np.linalg.norm(data_z - center, axis=1) / data_z.shape[1] precision = 1./(np.sqrt(2)*(np.sort(distance)[self._min_cluster_size])) center_grid.append(center) precision_grid.append(precision) normalizers.append((precision**num_instruments) * np.sum(np.exp(- (precision * distance)**2 )) / (np.power(2. * np.pi, num_instruments / 2.))) normalizers = np.ones(len(center_grid)) #np.array(normalizers) center_grid = np.array(center_grid) precision_grid = np.array(precision_grid) if self._critics_precision is not None: precision_grid = self._critics_precision*np.ones(precision_grid.shape) #print(np.sort(center_grid[:, 0].flatten())) #print(precision_grid[np.argsort(center_grid[:, 0].flatten())]) else: # We put a uniform kernel only on the data points of each cluster of each clustering normalizers = [] center_grid = [] leaf_id_list = [] for tree in range(cluster_labels.shape[1]): for leaf in cluster_ids[tree]: center_grid.append(np.mean(data_z[cluster_labels[:, tree].flatten()==leaf, :], axis=0)) # used only for tensorflow summary normalizers.append(np.sum(cluster_labels[:, tree].flatten()==leaf)) leaf_id_list.append((tree, leaf)) center_grid = np.array(center_grid) #print(np.sort(center_grid[:, 0].flatten())) #print(np.array(normalizers)[np.argsort(center_grid[:, 0].flatten())]) normalizers = np.ones(len(center_grid)) #np.array(normalizers) leaf_id_list = np.array(leaf_id_list) # tf Graph input if self._random_seed is not None: tf.set_random_seed(self._random_seed) self.Z = tf.placeholder("float", [None, num_instruments], name="instrument") self.P = tf.placeholder("float", [None, num_treatments], name="treatment") self.Y = tf.placeholder("float", [None, num_outcomes], name="outcome") self.Leaf = tf.placeholder("float", [None, cluster_labels.shape[1]], name="leaf_id") self.drop_prob = tf.placeholder_with_default( 1.0, shape=(), name="drop_prob") self.gmm_graph = GMMGameGraph(self.Z, self.P, self.Y, self.Leaf, self.drop_prob, eta_hedge=self._eta_hedge, loss_clip_hedge=self._loss_clip_hedge, learning_rate_modeler=self._learning_rate_modeler, learning_rate_critics=self._learning_rate_critics, critics_jitter=self._critics_jitter, critic_type=self._critic_type, l1_reg_weight_modeler=self._l1_reg_weight_modeler, l2_reg_weight_modeler=self._l2_reg_weight_modeler, dnn_layers=self._dnn_layers, dnn_poly_degree=self._dnn_poly_degree, dissimilarity_eta=self._dissimilarity_eta) if self._critic_type == 'Gaussian': self.gmm_graph.create_graph(normalizers=normalizers, center_grid=center_grid, precision_grid=precision_grid) else: self.gmm_graph.create_graph(normalizers=normalizers, leaf_list=leaf_id_list) # Initialize the variables (i.e. assign their default value) init = tf.global_variables_initializer() if num_treatments == 1: self.avg_fn = [] self.final_fn = [] self.best_fn = [] else: saver = tf.train.Saver(scope_variables("Modeler"), max_to_keep=self._num_steps) #print(scope_variables("Modeler")) avg_store_steps = list(np.random.choice(np.arange(int(0.2 * self._num_steps), self._num_steps), int(0.4 * self._num_steps), replace=False)) #print(avg_store_steps) # Start training loss = np.inf with tf.Session() as sess: if self._log_summary: merged = tf.summary.merge_all() writer = tf.summary.FileWriter(self._summary_dir, sess.graph) # Run the initializer sess.run(init) d1 = d2 = d3 = d4 = d5 = d6 = 0. for step in range(1, self._num_steps + 1): t1 = datetime.now() # Modeler for inner_step in range(self._train_ratio[0]): inds = data_it.get_next() y1, p1, z1, leaf1 = data_y[inds], data_p[inds], data_z[inds], cluster_labels[inds] inds = data_it.get_next() y2, p2, z2, leaf2 = data_y[inds], data_p[inds], data_z[inds], cluster_labels[inds] sess.run(self.gmm_graph.update_prev_moments, feed_dict={ self.Z: z1, self.P: p1, self.Y: y1, self.Leaf: leaf1, self.drop_prob: .9}) sess.run(self.gmm_graph.gradient_step_modeler, feed_dict={ self.Z: z2, self.P: p2, self.Y: y2, self.Leaf: leaf2, self.drop_prob: .9}) t2 = datetime.now() d1 += (t2 - t1).seconds + (t2 - t1).microseconds * 1E-6 if DEBUG: new_loss = sess.run(self.gmm_graph.max_violation, feed_dict={ self.Z: data_z, self.P: data_p, self.Y: data_y, self.Leaf: cluster_labels}) print("After modeler: Step " + str(step) + ", Moment violation= " + "{:.10f}".format(new_loss)) print([sess.run([crt.precision, crt.weights, crt._normalized_translation, crt.center, crt.output[0]], feed_dict={ self.Z: data_z, self.P: data_p, self.Y: data_y, self.Leaf: cluster_labels}) for crt in self.gmm_graph.critics]) print(sess.run([cw.value() for cw in self.gmm_graph.critic_weights])) # Critics for inner_step in range(self._train_ratio[1]): inds = data_it.get_next() y1, p1, z1, leaf1 = data_y[inds], data_p[inds], data_z[inds], cluster_labels[inds] inds = data_it.get_next() y2, p2, z2, leaf2 = data_y[inds], data_p[inds], data_z[inds], cluster_labels[inds] sess.run(self.gmm_graph.update_prev_moments, feed_dict={ self.Z: z1, self.P: p1, self.Y: y1, self.Leaf: leaf1, self.drop_prob: .9}) sess.run(self.gmm_graph.gradient_step_critics, feed_dict={ self.Z: z2, self.P: p2, self.Y: y2, self.Leaf: leaf2, self.drop_prob: .9}) if DEBUG: new_loss = sess.run(self.gmm_graph.max_violation, feed_dict={ self.Z: data_z, self.P: data_p, self.Y: data_y, self.Leaf: cluster_labels}) print("After Critic Step " + str(step) + ", Moment violation= " + "{:.10f}".format(new_loss)) print([sess.run([crt.precision, crt.weights, crt._normalized_translation, crt.center, crt.output[0]], feed_dict={ self.Z: data_z, self.P: data_p, self.Y: data_y, self.Leaf: cluster_labels}) for crt in self.gmm_graph.critics]) print([sess.run(cw.value()) for cw in self.gmm_graph.critic_weights]) t3 = datetime.now() d2 += (t3 - t2).seconds + (t3 - t2).microseconds * 1E-6 # Meta-Critic if step % self._hedge_step == 0: inds = data_it_hedge.get_next() y1, p1, z1, leaf1 = data_y[inds], data_p[inds], data_z[inds], cluster_labels[inds] sess.run(self.gmm_graph.gradient_step_meta_critic, feed_dict={ self.Z: z1, self.P: p1, self.Y: y1, self.Leaf: leaf1}) if DEBUG: new_loss = sess.run(self.gmm_graph.max_violation, feed_dict={ self.Z: data_z, self.P: data_p, self.Y: data_y, self.Leaf: cluster_labels}) print("After Meta-Critic Step " + str(step) + ", Moment violation= " + "{:.10f}".format(new_loss)) print([sess.run([crt.precision, crt.weights, crt._normalized_translation, crt.center, crt.output[0]], feed_dict={ self.Z: data_z, self.P: data_p, self.Y: data_y, self.Leaf: cluster_labels}) for crt in self.gmm_graph.critics]) print([sess.run(cw.value()) for cw in self.gmm_graph.critic_weights]) t4 = datetime.now() d3 += (t4 - t3).seconds + (t4 - t3).microseconds * 1E-6 if step % self._check_loss_step == 0 or step == 1 or step == self._num_steps: # Calculate batch loss and accuracy new_loss = sess.run(self.gmm_graph.max_violation, feed_dict={ self.Z: data_z, self.P: data_p, self.Y: data_y, self.Leaf: cluster_labels}) if new_loss <= loss: if num_treatments == 1: self.best_fn = sess.run(self.gmm_graph.modeler.output, feed_dict={self.P:self.test_grid.reshape(-1,1)}).flatten() else: saver.save(sess, "./tmp/model_best.ckpt") loss = new_loss t5 = datetime.now() d4 += (t5 - t4).seconds + (t5 - t4).microseconds * 1E-6 if self._log_summary and step % self._store_step == 0: summary = sess.run(merged, feed_dict={ self.Z: data_z, self.P: data_p, self.Y: data_y, self.Leaf: cluster_labels}) writer.add_summary(summary, step) log_function(writer, 'CriticWeights', center_grid, np.array([sess.run(cw.value()) for cw in self.gmm_graph.critic_weights]), step, agg='sum') #log_function(writer, 'CriticPrecisions', center_grid, np.array([sess.run(cr.precision.value()) for cr in self.gmm_graph.critics]), step, agg='mean') t6 = datetime.now() d5 += (t6 - t5).seconds + (t6 - t5).microseconds * 1E-6 if step in avg_store_steps: #step > .2 * self._num_steps: if num_treatments == 1: self.avg_fn.append(sess.run(self.gmm_graph.modeler.output, feed_dict={self.P:self.test_grid.reshape(-1,1)}).flatten()) else: saver.save(sess, "./tmp/model_{}.ckpt".format(step)) self._checkpoints.append(step) t7 = datetime.now() d6 += (t7 - t6).seconds + (t7 - t6).microseconds * 1E-6 if step % self._display_step == 0: new_loss = sess.run(self.gmm_graph.max_violation, feed_dict={ self.Z: data_z, self.P: data_p, self.Y: data_y, self.Leaf: cluster_labels}) print("Final Step " + str(step) + ", Moment violation= " + "{:.10f}".format(new_loss)) print("Modeler train time: {:.2f}".format(d1)) print("Critic train time: {:.2f}".format(d2)) print("Meta-critic train time: {:.2f}".format(d3)) print("Best loss checking time: {:.2f}".format(d4)) print("Summary storing time: {:.2f}".format(d5)) print("Average model calculation time: {:.2f}".format(d6)) print("Optimization Finished!") if num_treatments == 1: self.final_fn = sess.run(self.gmm_graph.modeler.output, feed_dict={self.P:self.test_grid.reshape(-1,1)}).flatten() else: saver.save(sess, "./tmp/model_final.ckpt") sess.close() if self._log_summary: writer.close()
def deepnn(x): def conv3d(x, W): return tf.nn.conv3d(x, W, strides=[1, 1, 1, 1, 1], padding='VALID') def max_pool(x): return tf.nn.max_pool3d(x, ksize=[1, 3, 3, 1, 1], strides=[1, 3, 3, 1, 1], padding='VALID') def weight_variable(shape): initial = tf.truncated_normal(shape, stddev=0.1) return tf.Variable(initial) def bias_variable(shape): initial = tf.constant(0.1, shape=shape) return tf.Variable(initial) with tf.name_scope('reshape'): x_image = tf.reshape(x, [-1, img_shape, img_shape, num_images, 1]) # print("After Reshape Layer") # print (x_image.shape) with tf.name_scope('conv1'): W_conv1 = weight_variable([3, 3, 5, 1, 32]) b_conv1 = bias_variable([32]) h_conv1 = tf.nn.relu(conv3d(x_image, W_conv1) + b_conv1) # print("After First Conv Layer") # print (h_conv1.shape) with tf.name_scope('pool1'): h_pool1 = max_pool(h_conv1) # print("After First pool Layer") # print (h_pool1.shape) keep_prob = tf.placeholder_with_default(1.0, shape=()) dropout1 = tf.nn.dropout(h_pool1, keep_prob) with tf.name_scope('conv2'): W_conv2 = weight_variable([3, 3, 1, 32, 32]) b_conv2 = bias_variable([32]) h_conv2 = tf.nn.relu(conv3d(dropout1, W_conv2) + b_conv2) # print("After Second conv Layer") # print (h_conv2.shape) with tf.name_scope('pool2'): h_pool2 = max_pool(h_conv2) # print ("After Second pool Layer") # print (h_pool2.shape) dropout2 = tf.nn.dropout(h_pool2, keep_prob) with tf.name_scope('fc1'): W_fc1 = weight_variable([1*1*1*32, 64]) b_fc1 = bias_variable([64]) h_pool2_flat = tf.reshape(dropout2, [-1, 1*1*1*32]) h_fc1 = tf.nn.relu(tf.matmul(h_pool2_flat, W_fc1) + b_fc1) with tf.name_scope('dropout'): # keep_prob = tf.placeholder(tf.float32) # h_fc1_drop = tf.nn.dropout(h_fc1, keep_prob) h_fc1_drop = tf.nn.dropout(h_fc1, keep_prob) # print ("After fully connected layer") # print (h_fc1.shape) with tf.name_scope('fc2'): W_fc2 = weight_variable([64, 64]) b_fc2 = bias_variable([64]) h_fc2 = tf.nn.relu(tf.matmul(h_fc1_drop, W_fc2) + b_fc2) print (h_fc1_drop.shape) with tf.name_scope('fc3'): W_fc3 = weight_variable([64, 2]) b_fc3 = bias_variable([2]) y_conv = tf.nn.relu(tf.matmul(h_fc2, W_fc3) + b_fc3) # with tf.name_scope('fc2'): # W_fc2 = weight_variable([64, 2]) # b_fc2 = bias_variable([2]) # y_conv = tf.nn.relu(tf.matmul(h_fc1_drop, W_fc2) + b_fc2) # print ("After second fully connected layer") # print (y_conv.shape) return y_conv, keep_prob
def __init__(self, model, data, trainer_agr, optimizer, lr_initial, batch_size, min_num_iter, max_num_iter, num_iter_after_best_val, max_num_iter_cotrain, reg_weight_ll, reg_weight_lu, reg_weight_uu, num_pairs_reg, iter_cotrain, reg_weight_vat=0.0, use_ent_min=False, enable_summaries=False, summary_step=1, summary_dir=None, warm_start=False, gradient_clip=None, logging_step=1, eval_step=1, abs_loss_chg_tol=1e-10, rel_loss_chg_tol=1e-7, loss_chg_iter_below_tol=30, checkpoints_dir=None, weight_decay=None, weight_decay_schedule=None, penalize_neg_agr=False, first_iter_original=True, use_l2_classif=True, seed=None, lr_decay_steps=None, lr_decay_rate=None, use_graph=False): super(TrainerClassificationGCN, self).__init__( model=model, abs_loss_chg_tol=abs_loss_chg_tol, rel_loss_chg_tol=rel_loss_chg_tol, loss_chg_iter_below_tol=loss_chg_iter_below_tol) self.data = data self.trainer_agr = trainer_agr self.batch_size = batch_size self.min_num_iter = min_num_iter self.max_num_iter = max_num_iter self.num_iter_after_best_val = num_iter_after_best_val self.max_num_iter_cotrain = max_num_iter_cotrain self.enable_summaries = enable_summaries self.summary_step = summary_step self.summary_dir = summary_dir self.warm_start = warm_start self.gradient_clip = gradient_clip self.logging_step = logging_step self.eval_step = eval_step self.checkpoint_path = ( os.path.join(checkpoints_dir, 'classif_best.ckpt') if checkpoints_dir is not None else None) self.weight_decay_initial = weight_decay self.weight_decay_schedule = weight_decay_schedule self.num_pairs_reg = num_pairs_reg self.reg_weight_ll = reg_weight_ll self.reg_weight_lu = reg_weight_lu self.reg_weight_uu = reg_weight_uu self.reg_weight_vat = reg_weight_vat self.use_ent_min = use_ent_min self.penalize_neg_agr = penalize_neg_agr self.use_l2_classif = use_l2_classif self.first_iter_original = first_iter_original self.iter_cotrain = iter_cotrain self.lr_initial = lr_initial self.lr_decay_steps = lr_decay_steps self.lr_decay_rate = lr_decay_rate self.use_graph = use_graph # Build TensorFlow graph. logging.info('Building classification TensorFlow graph...') # Create placeholders. input_indices = tf.placeholder( tf.int64, shape=(None,), name='input_indices') input_indices_unlabeled = tf.placeholder( tf.int32, shape=(None,), name='input_indices_unlabeled') input_labels = tf.placeholder(tf.int64, shape=(None,), name='input_labels') # Create a placeholder specifying if this is train time. is_train = tf.placeholder_with_default(False, shape=[], name='is_train') # Create some placeholders specific to GCN. self.support_op = tf.sparse_placeholder(tf.float32, name='support') self.features_op = tf.sparse_placeholder(tf.float32, name='features') self.num_features_nonzero_op = tf.placeholder( tf.int32, name='num_features_nonzero') # Save the data required to fill in these placeholders. We don't add them # directly in the graph as constants in order to avoid saving large # checkpoints. self.support = data.support self.features = data.dataset.features_sparse self.num_features_nonzero = data.num_features_nonzero # Create variables and predictions. with tf.variable_scope('predictions'): encoding, variables_enc, reg_params_enc = ( self.model.get_encoding_and_params( inputs=self.features_op, is_train=is_train, support=self.support_op, num_features_nonzero=self.num_features_nonzero_op)) self.variables = variables_enc self.reg_params = reg_params_enc predictions, variables_pred, reg_params_pred = ( self.model.get_predictions_and_params( encoding=encoding, is_train=is_train, support=self.support_op, num_features_nonzero=self.num_features_nonzero_op)) self.variables.update(variables_pred) self.reg_params.update(reg_params_pred) normalized_predictions = self.model.normalize_predictions(predictions) predictions_var_scope = tf.get_variable_scope() predictions_batch = tf.gather(predictions, input_indices, axis=0) normalized_predictions_batch = tf.gather( normalized_predictions, input_indices, axis=0) one_hot_labels = tf.one_hot( input_labels, data.num_classes, name='targets_one_hot') # Create a variable for weight decay that may be updated. weight_decay_var, weight_decay_update = self._create_weight_decay_var( weight_decay, weight_decay_schedule) # Create counter for classification iterations. iter_cls_total, iter_cls_total_update = self._create_counter() # Create loss. with tf.name_scope('loss'): if self.use_l2_classif: loss_supervised = tf.square(one_hot_labels - normalized_predictions_batch) loss_supervised = tf.reduce_sum(loss_supervised, axis=-1) loss_supervised = tf.reduce_mean(loss_supervised) else: loss_supervised = self.model.get_loss( predictions=predictions_batch, targets=one_hot_labels, weight_decay=None) # Agreement regularization loss. loss_agr = self._get_agreement_reg_loss(data, is_train) # If the first co-train iteration trains the original model (for # comparison purposes), then we do not add an agreement loss. if self.first_iter_original: loss_agr_weight = tf.cast(tf.greater(iter_cotrain, 0), tf.float32) loss_agr = loss_agr * loss_agr_weight # Weight decay loss. loss_reg = 0.0 if weight_decay_var is not None: for var in self.reg_params.values(): loss_reg += weight_decay_var * tf.nn.l2_loss(var) # Adversarial loss, in case we want to add VAT on top of GAM. ones = tf.fill(tf.shape(input_indices_unlabeled), 1.0) unlabeled_mask = tf.scatter_nd( input_indices_unlabeled[:, None], updates=ones, shape=[ data.num_samples, ], name='unlabeled_mask') placeholders = { 'support': self.support_op, 'num_features_nonzero': self.num_features_nonzero_op } loss_vat = get_loss_vat( inputs=self.features_op, predictions=predictions, mask=unlabeled_mask, is_train=is_train, model=model, placeholders=placeholders, predictions_var_scope=predictions_var_scope) num_unlabeled = tf.shape(input_indices_unlabeled)[0] loss_vat = tf.cond( tf.greater(num_unlabeled, 0), lambda: loss_vat, lambda: 0.0) if self.use_ent_min: # Use entropy minimization with VAT (i.e. VATENT). loss_ent = entropy_y_x(predictions, unlabeled_mask) loss_vat = loss_vat + tf.cond( tf.greater(num_unlabeled, 0), lambda: loss_ent, lambda: 0.0) loss_vat = loss_vat * self.reg_weight_vat if self.first_iter_original: # Do not add the adversarial loss in the first iteration if # the first iteration trains the plain baseline model. weight_loss_vat = tf.cond( tf.greater(iter_cotrain, 0), lambda: 1.0, lambda: 0.0) loss_vat = loss_vat * weight_loss_vat # Total loss. loss_op = loss_supervised + loss_agr + loss_reg + loss_vat # Create accuracy. accuracy = tf.equal( tf.argmax(normalized_predictions_batch, 1), input_labels) accuracy = tf.reduce_mean(tf.cast(accuracy, tf.float32)) # Create Tensorboard summaries. if self.enable_summaries: summaries = [ tf.summary.scalar('loss_supervised', loss_supervised), tf.summary.scalar('loss_agr', loss_agr), tf.summary.scalar('loss_reg', loss_reg), tf.summary.scalar('loss_total', loss_op) ] self.summary_op = tf.summary.merge(summaries) # Create learning rate schedule and optimizer. self.global_step = tf.train.get_or_create_global_step() if self.lr_decay_steps is not None and self.lr_decay_rate is not None: self.lr = tf.train.exponential_decay( self.lr_initial, self.global_step, self.lr_decay_steps, self.lr_decay_rate, staircase=True) self.optimizer = optimizer(self.lr) else: self.optimizer = optimizer(lr_initial) # Get trainable variables and compute gradients. grads_and_vars = self.optimizer.compute_gradients( loss_op, tf.trainable_variables(scope=tf.get_default_graph().get_name_scope())) # Clip gradients. if self.gradient_clip: variab = [elem[1] for elem in grads_and_vars] gradients = [elem[0] for elem in grads_and_vars] gradients, _ = tf.clip_by_global_norm(gradients, self.gradient_clip) grads_and_vars = tuple(zip(gradients, variab)) with tf.control_dependencies( tf.get_collection( tf.GraphKeys.UPDATE_OPS, scope=tf.get_default_graph().get_name_scope())): train_op = self.optimizer.apply_gradients( grads_and_vars, global_step=self.global_step) # Create a saver for model variables. trainable_vars = [v for _, v in grads_and_vars] # Put together the subset of variables to save and restore from the best # validation accuracy as we train the agreement model in one cotrain round. vars_to_save = trainable_vars + [] if isinstance(weight_decay_var, tf.Variable): vars_to_save.append(weight_decay_var) saver = tf.train.Saver(vars_to_save) # Put together all variables that need to be saved in case the process is # interrupted and needs to be restarted. self.vars_to_save = [iter_cls_total, self.global_step] if isinstance(weight_decay_var, tf.Variable): self.vars_to_save.append(weight_decay_var) if self.warm_start: self.vars_to_save.extend([v for v in self.variables]) # More variables to be initialized after the session is created. self.is_initialized = False self.rng = np.random.RandomState(seed) self.input_indices = input_indices self.input_indices_unlabeled = input_indices_unlabeled self.input_labels = input_labels self.predictions = predictions self.normalized_predictions = normalized_predictions self.normalized_predictions_batch = normalized_predictions_batch self.weight_decay_var = weight_decay_var self.weight_decay_update = weight_decay_update self.iter_cls_total = iter_cls_total self.iter_cls_total_update = iter_cls_total_update self.accuracy = accuracy self.train_op = train_op self.loss_op = loss_op self.saver = saver self.batch_size_actual = tf.shape(self.predictions)[0] self.reset_optimizer = tf.variables_initializer(self.optimizer.variables()) self.is_train = is_train
def mask_leading_dimension(tensor): return tf.placeholder_with_default(tensor, [None] + tensor.get_shape().as_list()[1:])
def __init__(self, params): # Input variable if infer == 1: batch_size = 1 gen_length = 1 else: batch_size = params.batch_size gen_length = params.gen_length # print 'Batch_size: %d' % params.batch_size self.dropout_keep = tf.placeholder_with_default(tf.constant(1.0), shape=None, name='dropout_keep') self.lr = tf.placeholder_with_default(tf.constant(0.01), shape=None, name='learning_rate') self.x_word = tf.placeholder(tf.int32, shape=(None, params.turn_num * params.utc_length), name='x_word') self.x_api = tf.placeholder(tf.float32, shape=(None, 3), name='x_api') self.y_word_in = tf.placeholder(tf.int32, shape=(None, params.utc_length), name='y_word_in') self.y_word_out = tf.placeholder(tf.int32, shape=(None, params.utc_length), name='y_word_out') # Word embedding x_embedding = tf.get_variable( name='x_embedding', shape=[params.vocab_size + 1, params.embed_size]) x_word_embedded = tf.nn.embedding_lookup(x_embedding, self.x_word, name='x_word_embedded') # Extend x_api to concat with y_word_embedded def single_cell(state_size): # define the cell of LSTM return tf.contrib.rnn.BasicLSTMCell(state_size) # Encoder self.encoder_multi_cell = tf.contrib.rnn.MultiRNNCell([ single_cell(params.state_size) for _ in range(params.layer_num) ]) # multi-layer self.encoder_initial_state = self.encoder_multi_cell.zero_state( params.batch_size, tf.float32) # init state of LSTM self.encoder_outputs, self.encoder_last_state = tf.nn.dynamic_rnn( self.encoder_multi_cell, x_word_embedded, initial_state=self.encoder_initial_state, scope='encoder') # Use encoder_last_state as feature (not as initial_state) feature = self.encoder_last_state[0][ 1] # Use state h [1] (c [0]) as the feature feature = tf.concat([feature, self.x_api], 1) self.drop = tf.nn.dropout(feature, self.dropout_keep) # Fully connected layer fc_shape = [params.state_size + 3, params.fc_size] W_fc = tf.Variable(tf.truncated_normal(fc_shape, stddev=0.1), name='W_fc') b_fc = tf.Variable(tf.constant(0.0, shape=[params.fc_size]), name='b_fc') l2_loss = tf.nn.l2_loss(W_fc) + tf.nn.l2_loss(b_fc) self.fc = tf.nn.xw_plus_b(self.drop, W_fc, b_fc, name='fc1') self.fc1 = tf.nn.relu(self.fc, name='fc') # Softmax - act act_shape = [params.fc_size, params.act_size] W_act = tf.Variable(tf.truncated_normal(act_shape, stddev=0.1), name='W_act') b_act = tf.Variable(tf.constant(0.0, shape=[params.act_size]), name='b_act') # Score & Sigmoid self.score = tf.nn.xw_plus_b(self.fc1, W_act, b_act, name='score') self.prob = tf.nn.softmax(self.score, name='prob') loss = tf.nn.softmax_cross_entropy_with_logits(logits=self.score, labels=self.y_act) self.loss = tf.reduce_mean(loss) + params.l2_reg * l2_loss self.train_step = tf.train.AdamOptimizer( params.learning_rate).minimize(self.loss)
def train( self, training_data: "TrainingData", cfg: Optional["RasaNLUModelConfig"] = None, **kwargs: Any, ) -> None: """Train the embedding intent classifier on a data set.""" logger.debug("Started training embedding classifier.") # set numpy random seed np.random.seed(self.random_seed) session_data = self.preprocess_train_data(training_data) possible_to_train = self._check_enough_labels(session_data) if not possible_to_train: logger.error("Can not train a classifier. " "Need at least 2 different classes. " "Skipping training of classifier.") return if self.evaluate_on_num_examples: session_data, eval_session_data = train_utils.train_val_split( session_data, self.evaluate_on_num_examples, self.random_seed, label_key="label_ids", ) else: eval_session_data = None self.graph = tf.Graph() with self.graph.as_default(): # set random seed tf.set_random_seed(self.random_seed) # allows increasing batch size batch_size_in = tf.placeholder(tf.int64) ( self._iterator, train_init_op, eval_init_op, ) = train_utils.create_iterator_init_datasets( session_data, eval_session_data, batch_size_in, self.batch_in_strategy, label_key="label_ids", ) self._is_training = tf.placeholder_with_default(False, shape=()) loss, acc = self._build_tf_train_graph(session_data) # define which optimizer to use self._train_op = tf.train.AdamOptimizer().minimize(loss) # train tensorflow graph self.session = tf.Session(config=self._tf_config) train_utils.train_tf_dataset( train_init_op, eval_init_op, batch_size_in, loss, acc, self._train_op, self.session, self._is_training, self.epochs, self.batch_in_size, self.evaluate_on_num_examples, self.evaluate_every_num_epochs, ) # rebuild the graph for prediction self.pred_confidence = self._build_tf_pred_graph(session_data)
def main(results_dir='results/sho/test', trials=20, learning_rate=1e-3, reg_weight=1e-3, timesteps=25, batch_size=128, n_epochs1=10001, n_epochs2=10001): # Hyperparameters summary_step = 1000 primitive_funcs = [ *[functions.Constant()] * 2, *[functions.Identity()] * 4, *[functions.Square()] * 4, *[functions.Sin()] * 2, *[functions.Exp()] * 2, *[functions.Sigmoid()] * 2, *[functions.Product(norm=0.1)] * 2, ] # Import parabola data data = np.load('dataset/sho.npz') x_d = np.asarray(data["x_d"]) x_v = np.asarray(data["x_v"]) y_d = np.asarray(data["y_d"]) y_v = np.asarray(data["y_v"]) omega2_data = data["omega2"] N = data["N"] # Prepare data x = np.stack((x_d, x_v), axis=2) # Shape (N, NT, 2) y0 = np.stack( (y_d[:, 0], y_v[:, 0]), axis=1) # Initial conditions for prediction y, fed into propagator y_data = np.stack((y_d[:, 1:timesteps + 1], y_v[:, 1:timesteps + 1]), axis=2) # shape(NG, timesteps, 2) z_data = omega2_data[:, np.newaxis] # Tensorflow placeholders for x, y0, y x_input = tf.placeholder(shape=(None, x.shape[1], x.shape[2]), dtype=tf.float32, name="enc_input") y0_input = tf.placeholder(shape=(None, 2), dtype=tf.float32, name="prop_input") # input is d, v y_input = tf.placeholder(shape=(None, timesteps, 2), dtype=tf.float32, name="label_input") length_input = tf.placeholder(dtype=tf.int32, shape=()) # Dynamics encoder encoder = helpers.Encoder(n_filters=[16, 16, 16, 16]) training = tf.placeholder_with_default(False, []) z = encoder(x_input, training=training) # Propagating decoders prop_d = SymbolicNetL0(2, funcs=primitive_funcs) prop_v = SymbolicNetL0(2, funcs=primitive_funcs) prop_d.build(4) prop_v.build(4) # Building recurrent structure rnn = tf.keras.layers.RNN(SymbolicCell(prop_d, prop_v), return_sequences=True) y0_rnn = tf.concat([ tf.expand_dims(y0_input, axis=1), tf.zeros((tf.shape(y0_input)[0], length_input - 1, 2)) ], axis=1) prop_input = tf.concat([ y0_rnn, tf.keras.backend.repeat(z, length_input), tf.ones((tf.shape(y0_input)[0], length_input, 1)) ], axis=2) y_hat = rnn(prop_input) length_list = [1, 2, 3, 4, 5, 7, 10, 15, 25] # Slowly increase the length of propagation # Training learning_rate_ph = tf.placeholder(tf.float32) opt = tf.train.RMSPropOptimizer(learning_rate=learning_rate_ph) reg_weight_ph = tf.placeholder(tf.float32) reg_loss = prop_d.get_loss() + prop_v.get_loss() error = tf.losses.mean_squared_error(labels=y_input[:, :length_input, :], predictions=y_hat) loss = error + reg_weight_ph * reg_loss train = tf.group([opt.minimize(loss), encoder.bn.updates]) batch = helpers.batch_generator([x, y_data, y0, z_data], N=N, batch_size=batch_size) # Training session config = tf.ConfigProto() config.gpu_options.allow_growth = True with tf.Session(config=config) as sess: for _ in range(trials): loss_i = np.nan while np.isnan(loss_i): loss_list = [] error_list = [] reg_list = [] sess.run(tf.global_variables_initializer()) length_i = 1 for i in range(n_epochs1 + n_epochs2): if i < n_epochs1: lr_i = learning_rate else: lr_i = learning_rate / 10 x_batch, y_batch, y0_batch, z_batch = next(batch) feed_dict = { x_input: x_batch, y0_input: y0_batch, y_input: y_batch, learning_rate_ph: lr_i, training: True, reg_weight_ph: reg_weight, length_input: length_i } _ = sess.run(train, feed_dict=feed_dict) if i % summary_step == 0: feed_dict[training] = False loss_i, error_i, reg_i, z_arr = sess.run( (loss, error, reg_loss, z), feed_dict=feed_dict) r = np.corrcoef(z_batch[:, 0], z_arr[:, 0])[1, 0] loss_list.append(loss_i) error_list.append(error_i) reg_list.append(reg_i) print( "Epoch %d\tTotal loss: %f\tError: %f\tReg loss: %f\tCorrelation: %f" % (i, loss_i, error_i, reg_i, r)) if np.isnan(loss_i): break i_length = min(i // 1000, len(length_list) - 1) length_i = length_list[i_length] weights_d = sess.run(prop_d.get_weights()) expr_d = pretty_print.network(weights_d, primitive_funcs, ["d", "v", "z", 1]) print(expr_d) weights_v = sess.run(prop_v.get_weights()) expr_v = pretty_print.network(weights_v, primitive_funcs, ["d", "v", "z", 1]) print(expr_v) print("Done. Saving results.") # z_arr = sess.run(z, feed_dict=feed_dict) # Save results results = { "summary_step": summary_step, "learning_rate": learning_rate, "n_epochs1": n_epochs1, "reg_weight": reg_weight, "timesteps": timesteps, "weights_d": weights_d, "weights_v": weights_v, "loss_plot": loss_list, "error_plot": error_list, "reg_plot": reg_list, "expr_d": expr_d, "expr_v": expr_v } trial_dir = helpers.get_trial_path( results_dir) # Get directory in which to save trial results tf.saved_model.simple_save(sess, trial_dir, inputs={ "x": x_input, "y0": y0_input, "training": training }, outputs={ "z": z, "y": y_hat }) # Save a summary of the parameters and results with open(os.path.join(trial_dir, 'summary.pickle'), "wb+") as f: pickle.dump(results, f) with open(os.path.join(results_dir, 'eq_summary.txt'), 'a') as f: f.write(str(expr_d) + "\n") f.write(str(expr_v) + "\n") f.write("Error: %f\n\n" % error_list[-1])
def _make_input_ops(self): self.x_in = tf.placeholder_with_default(self.dataset.image_op, shape=[None, self.x_dims[0], self.x_dims[1], self.x_dims[2]], name='x_in') self.y_in = tf.placeholder_with_default(self.dataset.label_op, shape=[None, self.n_classes], name='y_in')
def _testDecoderGeneric(self, decoder, with_beam_search=False, with_alignment_history=False, support_alignment_history=True): batch_size = 4 beam_width = 5 num_hyps = beam_width if with_beam_search else 1 vocab_size = 10 depth = 6 end_token = 2 start_tokens = tf.placeholder_with_default([1] * batch_size, shape=[None]) memory_sequence_length = [3, 7, 5, 4] memory_time = max(memory_sequence_length) memory = tf.placeholder_with_default( np.random.randn(batch_size, memory_time, depth).astype(np.float32), shape=(None, None, depth)) memory_sequence_length = tf.placeholder_with_default(memory_sequence_length, shape=[None]) embedding = tf.placeholder_with_default( np.random.randn(vocab_size, depth).astype(np.float32), shape=(vocab_size, depth)) if with_beam_search: decode_fn = decoder.dynamic_decode_and_search else: decode_fn = decoder.dynamic_decode additional_kwargs = {} if with_alignment_history: additional_kwargs["return_alignment_history"] = True if with_beam_search: additional_kwargs["beam_width"] = beam_width if (with_beam_search and with_alignment_history and "RNN" in decoder.__class__.__name__ and not "reorder_tensor_arrays" in fn_args(tf.contrib.seq2seq.BeamSearchDecoder.__init__)): support_alignment_history = False outputs = decode_fn( embedding, start_tokens, end_token, vocab_size=vocab_size, maximum_iterations=10, memory=memory, memory_sequence_length=memory_sequence_length, **additional_kwargs) ids = outputs[0] state = outputs[1] lengths = outputs[2] log_probs = outputs[3] decode_time = tf.shape(ids)[-1] with self.test_session() as sess: sess.run(tf.global_variables_initializer()) if not with_alignment_history: self.assertEqual(4, len(outputs)) else: self.assertEqual(5, len(outputs)) alignment_history = outputs[4] if support_alignment_history: self.assertIsInstance(alignment_history, tf.Tensor) with self.test_session() as sess: alignment_history, decode_time = sess.run([alignment_history, decode_time]) self.assertAllEqual( [batch_size, num_hyps, decode_time, memory_time], alignment_history.shape) else: self.assertIsNone(alignment_history) with self.test_session() as sess: ids, lengths, log_probs = sess.run([ids, lengths, log_probs]) self.assertAllEqual([batch_size, num_hyps], ids.shape[0:2]) self.assertAllEqual([batch_size, num_hyps], lengths.shape) self.assertAllEqual([batch_size, num_hyps], log_probs.shape)
def build(self, target_output, train_input, test_input, hparams, name='ll'): """Build the Label Learner network.""" self.name = name self._hparams = hparams with tf.variable_scope(self.name): # Setup placeholders # ------------------------------------ self._batch_type = tf.placeholder_with_default(input='training', shape=[], name='batch_type') # 0) nn params # ------------------------------------ non_linearity = self._hparams.non_linearity hidden_size = self._hparams.hidden_size # 1) organise inputs to network # ------------------------------------ # Switch inputs based on the batch type x_nn = tf.cond(tf.equal(self._batch_type, 'training'), lambda: train_input, lambda: test_input) self._dual.set_op('target_output', target_output) t_nn_shape = target_output.get_shape().as_list() t_nn_size = np.prod(t_nn_shape[1:]) x_nn = tf.layers.flatten(x_nn) # 2) build the network # ------------------------------------ # apply noise at train and/or test time, to regularise / test generalisation x_nn = tf.cond( tf.equal(self._batch_type, 'encoding'), lambda: image_utils.add_image_salt_noise_flat( x_nn, None, noise_val=self._hparams.test_with_noise, noise_factor=self._hparams.test_with_noise_pp), lambda: x_nn) x_nn = tf.cond( tf.equal(self._batch_type, 'training'), lambda: image_utils.add_image_salt_noise_flat( x_nn, None, noise_val=self._hparams.train_with_noise, noise_factor=self._hparams.train_with_noise_pp), lambda: x_nn) # apply dropout during training input_keep_prob = self._hparams.train_input_dropout_keep_prob x_nn = tf.cond(tf.equal(self._batch_type, 'training'), lambda: tf.nn.dropout(x_nn, input_keep_prob), lambda: x_nn) self._dual.set_op('x_nn', x_nn) x_nn = tf.layers.flatten(x_nn) # Hidden layer[s] weights = [] # Build hidden layer(s) if hidden_size > 0: layer_hidden = tf.layers.Dense( units=hidden_size, activation=type_activation_fn(non_linearity), name='hidden', kernel_initializer=build_kernel_initializer('xavier')) hidden_out = layer_hidden(x_nn) weights.append(layer_hidden.weights[0]) weights.append(layer_hidden.weights[1]) hidden_keep_prob = self._hparams.train_hidden_dropout_keep_prob hidden_out = tf.cond( tf.equal(self._batch_type, 'training'), lambda: tf.nn.dropout(hidden_out, hidden_keep_prob), lambda: hidden_out) else: hidden_out = x_nn # Build output layer layer_out = tf.layers.Dense( units=t_nn_size, name='logits', kernel_initializer=build_kernel_initializer('xavier')) logits = layer_out(hidden_out) weights.append(layer_out.weights[0]) weights.append(layer_out.weights[1]) self._weights = weights f = tf.nn.softmax(logits) # Unit range y = tf.stop_gradient(f) self._dual.set_op('preds', y) self._dual.set_op('logits', logits) # Compute accuracy preds = self._dual.get_op('preds') labels = self._dual.get_op('target_output') unseen_sum = 1 unseen_idxs = (0, unseen_sum) # if name == 'll_vc': # labels = tf.Print(labels, [tf.argmax(labels, 1)], 'labels=', summarize=20) # preds = tf.Print(preds, [tf.argmax(preds, 1)], 'preds=', summarize=20) correct_predictions = tf.equal(tf.argmax(preds, 1), tf.argmax(labels, 1)) correct_predictions = tf.cast(correct_predictions, tf.float32) self._dual.set_op('correct_predictions', correct_predictions) self._dual.set_op('accuracy', tf.reduce_mean(correct_predictions)) self._dual.set_op( 'accuracy_unseen', tf.reduce_mean( correct_predictions[unseen_idxs[0]:unseen_idxs[1]])) self._dual.set_op('total_correct_predictions', tf.reduce_sum(correct_predictions)) # Build loss function loss = tf.nn.softmax_cross_entropy_with_logits_v2( labels=target_output, logits=logits) loss = tf.reduce_mean(loss) self._dual.set_op('loss', loss) if self._hparams.l2_regularizer > 0.0: all_losses = [loss] for i, weight in enumerate(weights): weight_loss = tf.nn.l2_loss(weight) weight_loss_sum = tf.reduce_sum(weight_loss) weight_loss_scaled = weight_loss_sum * self._hparams.l2_regularizer all_losses.append(weight_loss_scaled) all_losses_op = tf.add_n(all_losses) self._build_optimizer(all_losses_op, 'training_ll', scope=name) else: self._build_optimizer(loss, 'training_ll', scope=name) return y
return doc_emb # In[35]: tf.reset_default_graph() with tf.name_scope('inputs'): ##X = tf.placeholder(shape=(None, sen_len), dtype=tf.int64, name='inputs') X_emb = tf.placeholder(shape=(None, doc_len, 3072), dtype=tf.float32, name='inputs') y = tf.placeholder(shape=(None, ), dtype=tf.int64, name='labels') is_training = tf.placeholder_with_default(False, shape=[], name='is_training') seq_length = tf.placeholder(shape=(None, ), dtype=tf.int64, name='seq_length') ## prepare embedding #with tf.device('/cpu:0'): ''' with tf.name_scope('embedding'): # no pretrained_emb if pretrained_emb is False: embedding = tf.Variable(tf.random_uniform([vocab_size, embedding_size], -1.0, 1.0), trainable = True) # load pretrained_emb ## see the backup: how to deal with too large emb
#--------------------------------------------------------CNN----------------------------------------------------- #---------------------------------------------------------------------------------------------------------------- # Training Parameters learning_rate = 0.001 num_steps = 5000 batch_size = 64 display_step = 10 # Network Parameters num_outputs = 1 # angle # tf Graph input X = tf.placeholder(tf.float32, [None, 224, 224, 3], name="X") Y = tf.placeholder(tf.float32, [None, num_outputs], name="Y") dropout_prob = tf.placeholder_with_default(1.0, shape=()) # Create model def conv_net(x): # --------------Convolution Layers-------------- conv0 = tf.layers.conv2d(inputs=x, filters = 16, kernel_size = 3, strides = (1, 1), activation=tf.nn.relu, padding='same') conv1 = tf.layers.conv2d(inputs=conv0, filters = 32, kernel_size = 3, strides = (1, 1), activation=tf.nn.relu, padding='same') lrn1 = tf.nn.local_response_normalization(conv1, depth_radius=2, alpha=1e-05, beta=0.75, bias=1.0) pool1 = tf.layers.max_pooling2d(inputs=lrn1, pool_size=[2, 2], strides=2) conv2 = tf.layers.conv2d(inputs=pool1, filters = 32, kernel_size = 3, strides = (1, 1), activation=tf.nn.relu, padding='same') lrn2 = tf.nn.local_response_normalization(conv2, depth_radius=2, alpha=1e-05, beta=0.75, bias=1.0) pool2 = tf.layers.max_pooling2d(inputs=lrn2, pool_size=[2, 2], strides=2) conv3 = tf.layers.conv2d(inputs=pool2, filters = 32, kernel_size = 3, strides = (1, 1), activation=tf.nn.relu, padding='same')
else: # First 5 is a hard-coded symmetric frame padding, ignored but time added! print("Warming up %d" % (5 - i)) print("total time " + str(srtime) + ", frame number " + str(max_iter)) # The training mode elif FLAGS.mode == 'train': # hard coded save filelist = [ 'main.py', 'lib/Teco.py', 'lib/frvsr.py', 'lib/dataloader.py', 'lib/ops.py' ] for filename in filelist: shutil.copyfile('./' + filename, FLAGS.summary_dir + filename.replace("/", "_")) useValidat = tf.placeholder_with_default(tf.constant(False, dtype=tf.bool), shape=()) rdata = frvsr_gpu_data_loader(FLAGS, useValidat) # Data = collections.namedtuple('Data', 'paths_HR, s_inputs, s_targets, image_count, steps_per_epoch') print('tData count = %d, steps per epoch %d' % (rdata.image_count, rdata.steps_per_epoch)) if (FLAGS.ratio > 0): Net = TecoGAN(rdata.s_inputs, rdata.s_targets, FLAGS) else: Net = FRVSR(rdata.s_inputs, rdata.s_targets, FLAGS) # Network = collections.namedtuple('Network', 'gen_output, train, learning_rate, update_list, ' # 'update_list_name, update_list_avg, image_summary') # Add scalar summary tf.summary.scalar('learning_rate', Net.learning_rate) train_summary = [] for key, value in zip(Net.update_list_name, Net.update_list_avg):
def _testDecoderInference(self, decoder, initial_state_fn=None, num_sources=1, with_beam_search=False, with_alignment_history=False, dtype=tf.float32, checkpoint_path=None): batch_size = 4 beam_width = 5 num_hyps = beam_width if with_beam_search else 1 vocab_size = 10 depth = 6 end_token = 2 start_tokens = tf.placeholder_with_default([1] * batch_size, shape=[None]) embedding = tf.placeholder_with_default(np.random.randn( vocab_size, depth).astype(dtype.as_numpy_dtype()), shape=(vocab_size, depth)) initial_state, memory, memory_sequence_length = _generate_source_context( batch_size, depth, initial_state_fn=initial_state_fn, num_sources=num_sources, dtype=dtype) if with_beam_search: decode_fn = decoder.dynamic_decode_and_search else: decode_fn = decoder.dynamic_decode additional_kwargs = {} if with_alignment_history: additional_kwargs["return_alignment_history"] = True if with_beam_search: additional_kwargs["beam_width"] = beam_width outputs = decode_fn(embedding, start_tokens, end_token, vocab_size=vocab_size, initial_state=initial_state, maximum_iterations=10, memory=memory, memory_sequence_length=memory_sequence_length, **additional_kwargs) ids = outputs[0] state = outputs[1] lengths = outputs[2] log_probs = outputs[3] self.assertEqual(log_probs.dtype, tf.float32) saver = tf.train.Saver(var_list=tf.global_variables()) with self.test_session(graph=tf.get_default_graph()) as sess: if checkpoint_path is not None: saver.restore(sess, checkpoint_path) else: sess.run(tf.global_variables_initializer()) if not with_alignment_history: self.assertEqual(4, len(outputs)) else: self.assertEqual(5, len(outputs)) alignment_history = outputs[4] if decoder.support_alignment_history and num_sources == 1: self.assertIsInstance(alignment_history, tf.Tensor) alignment_history, decode_time, memory_time = sess.run([ alignment_history, tf.shape(ids)[-1], tf.shape(memory)[1] ]) self.assertAllEqual( [batch_size, num_hyps, decode_time - 1, memory_time], alignment_history.shape) else: self.assertIsNone(alignment_history) ids, lengths, log_probs = sess.run([ids, lengths, log_probs]) self.assertAllEqual([batch_size, num_hyps], ids.shape[0:2]) self.assertAllEqual([batch_size, num_hyps], lengths.shape) self.assertAllEqual([batch_size, num_hyps], log_probs.shape)
def __init__(self, **kwargs): self.gan_defaults.update(**kwargs) super(GAN, self).__init__(**self.gan_defaults) assert self.sess is not None assert len(self.x_dims) == 3 if self.mode == 'TRAIN': assert self.dataset is not None self.generator = Generator(gen_kernels=self.gen_kernels, x_dims=self.x_dims) self.discriminator = Discriminator(dis_kernels=self.dis_kernels, soften_labels=self.soften_labels, soften_sddev=self.soften_sddev) ## ---------------------- Input ops ----------------------- ## if self.iterator_dataset: self.x_in = tf.placeholder( tf.float32, shape=[None, self.x_dims[0], self.x_dims[1], self.x_dims[2]], name='x_in') else: self.x_in = tf.placeholder_with_default( self.dataset.image_op, shape=[None, self.x_dims[0], self.x_dims[1], self.x_dims[2]], name='x_in') self.zed_default = tf.random_normal([self.batch_size, self.z_dim], mean=0.0, stddev=1.0) self.zed = tf.placeholder_with_default(self.zed_default, shape=[None, self.z_dim], name='zed') self.keep_prob = tf.placeholder_with_default(0.5, shape=[], name='keep_prob') ## ---------------------- Model ops ----------------------- ## self.x_hat = self.generator.model(self.zed, keep_prob=self.keep_prob) self.p_real_real = self.discriminator.model(self.x_in, keep_prob=self.keep_prob) self.p_real_fake = self.discriminator.model(self.x_hat, keep_prob=self.keep_prob, reuse=True) ## ---------------------- Loss ops ------------------------ ## self._loss_op() ## -------------------- Training ops ---------------------- ## self._training_ops() ## --------------------- Summary ops ---------------------- ## self._summary_ops() ## ------------------- TensorFlow ops --------------------- ## self._tf_ops() ## ---------------------- Initialize ---------------------- ## self._print_info_to_file(filename=os.path.join( self.save_dir, '{}_settings.txt'.format(self.name))) self.sess.run(tf.global_variables_initializer()) ## ---------------------- Pretraining --------------------- ## if self.pretraining is not None: self._pretraining()
else: raise ValueError('Invalid argument for model: ' + str(FLAGS.model)) features_feed = features_feed + list(features_par) # Define placeholders placeholders = { 'support': [tf.sparse_placeholder(tf.float32) for _ in range(num_supports)], 'all_phrase': tf.placeholder(tf.float32, shape=(None, all_phrase.shape[1])), 'features': tf.sparse_placeholder(tf.float32, shape=(None, features_par[2][1])), 'labels': tf.placeholder(tf.float32, shape=(None, y_train.shape[1])), 'labels_mask': tf.placeholder(tf.int32), 'dropout': tf.placeholder_with_default(0., shape=()), 'num_features_nonzero': tf.placeholder(tf.int32) # helper variable for sparse dropout } with tf.device('/gpu:0'): # Create model model = model_func(placeholders, input_dim=features_par[2][1], logging=False) # Define model evaluation function def evaluate(features, support, labels, mask, all_phrase, placeholders): t_test = time.time() feed_dict_val = construct_feed_dict(features, support, labels, mask,
import tensorflow as tf input_x = tf.placeholder(tf.float32, name='input_x') input_x1 = tf.placeholder_with_default(5.0, shape=None, name='input_x1') w = tf.Variable(1.0, name='w') y = tf.add(input_x, w) y1 = tf.add(input_x1, w) with tf.Session() as sess: sess.run(tf.initialize_all_variables()) for i in range(5): print(sess.run(y, {input_x: i})) # print(sess.run(y,feed_dict={input_x:i})) print('==') print(sess.run(y1))
def add_final_training_ops(class_count, final_tensor_name, bottleneck_tensor): """Adds a new softmax and fully-connected layer for training. We need to retrain the top layer to identify our new classes, so this function adds the right operations to the graph, along with some variables to hold the weights, and then sets up all the gradients for the backward pass. The set up for the softmax and fully-connected layers is based on: https://tensorflow.org/versions/master/tutorials/mnist/beginners/index.html Args: class_count: Integer of how many categories of things we're trying to recognize. final_tensor_name: Name string for the new final node that produces results. bottleneck_tensor: The output of the main CNN graph. Returns: The tensors for the training and cross entropy results, and tensors for the bottleneck input and ground truth input. """ with tf.name_scope('input'): bottleneck_input = tf.placeholder_with_default( bottleneck_tensor, shape=[None, BOTTLENECK_TENSOR_SIZE], name='BottleneckInputPlaceholder') ground_truth_input = tf.placeholder(tf.float32, [None, class_count], name='GroundTruthInput') # Organizing the following ops as `final_training_ops` so they're easier # to see in TensorBoard layer_name = 'final_training_ops' with tf.name_scope(layer_name): with tf.name_scope('weights'): layer_weights = tf.Variable(tf.truncated_normal( [BOTTLENECK_TENSOR_SIZE, class_count], stddev=0.001), name='final_weights') variable_summaries(layer_weights, layer_name + '/weights') with tf.name_scope('biases'): layer_biases = tf.Variable(tf.zeros([class_count]), name='final_biases') variable_summaries(layer_biases, layer_name + '/biases') with tf.name_scope('Wx_plus_b'): logits = tf.matmul(bottleneck_input, layer_weights) + layer_biases tf.histogram_summary(layer_name + '/pre_activations', logits) final_tensor = tf.nn.softmax(logits, name=final_tensor_name) tf.histogram_summary(final_tensor_name + '/activations', final_tensor) with tf.name_scope('cross_entropy'): cross_entropy = tf.nn.softmax_cross_entropy_with_logits( logits, ground_truth_input) with tf.name_scope('total'): cross_entropy_mean = tf.reduce_mean(cross_entropy) tf.scalar_summary('cross entropy', cross_entropy_mean) with tf.name_scope('train'): train_step = tf.train.GradientDescentOptimizer( FLAGS.learning_rate).minimize(cross_entropy_mean) return (train_step, cross_entropy_mean, bottleneck_input, ground_truth_input, final_tensor)
def build_shapes(self, shape_in, shape_out): shape_in = np.array(shape_in, np.int32) shape_out = np.array(shape_out, np.int32) return (tf.placeholder_with_default(shape_in, shape=None), tf.placeholder_with_default(shape_out, shape=None))