def testFeederActsLikeQueue(self): # Tests that a feeder acts like a queue feeder = feeder_lib.Feeder( dtypes=[dtypes_lib.string, dtypes_lib.string], shapes=[[], []], capacity=10) feeder.set_many_fed_tensors([ constant_op.constant(['a0', 'a1', 'a2']), constant_op.constant(['b0', 'b1', 'b2']) ]) out_a, out_b = feeder.get_fed_tensors() with self.test_session() as session: coord = coordinator.Coordinator() queue_runner_impl.start_queue_runners(session, coord=coord) a, b = session.run([out_a, out_b]) self.assertEquals(b'a0', a) self.assertEquals(b'b0', b) a = session.run(out_a) # Omit b! self.assertEquals(b'a1', a) a, b = session.run([out_a, out_b]) self.assertEquals(b'a2', a) self.assertEquals(b'b2', b) # queued together a, b = session.run([out_a, out_b]) # loops around self.assertEquals(b'a0', a) self.assertEquals(b'b0', b) # queued together coord.request_stop() coord.join()
def test_long_eval_discard_indivisible(self): g = ops.Graph() with g.as_default(): model = ARModel(periodicities=2, num_features=1, num_time_buckets=10, input_window_size=2, output_window_size=2) raw_features = { TrainEvalFeatures.TIMES: [[1, 3, 5, 7, 11]], TrainEvalFeatures.VALUES: [[[1.], [2.], [3.], [4.], [5.]]]} model.initialize_graph() raw_evaluation = model.define_loss( raw_features, mode=estimator_lib.ModeKeys.EVAL) with session.Session() as sess: coordinator = coordinator_lib.Coordinator() queue_runner_impl.start_queue_runners(sess, coord=coordinator) variables.global_variables_initializer().run() raw_evaluation_evaled = sess.run(raw_evaluation) self.assertAllEqual([[7, 11]], raw_evaluation_evaled.prediction_times) for feature_name in raw_evaluation.predictions: self.assertAllEqual( [1, 2, 1], # batch, window, num_features. The window has two cut # off for the first input window and one discarded so # that the remainder is divisible into output windows. raw_evaluation_evaled.predictions[feature_name].shape) coordinator.request_stop() coordinator.join()
def _test_pass_to_next(self, read_offset, step, correct_offset): stub_model = StubTimeSeriesModel(correct_offset=correct_offset) data = self._make_test_data( length=100 + read_offset, cut_start=None, cut_end=None, offset=100., step=step) init_input_fn = input_pipeline.WholeDatasetInputFn( input_pipeline.NumpyReader( {k: v[:-read_offset] for k, v in data.items()})) result_input_fn = input_pipeline.WholeDatasetInputFn( input_pipeline.NumpyReader( {k: v[read_offset:] for k, v in data.items()})) chainer = state_management.ChainingStateManager( state_saving_interval=1) stub_model.initialize_graph() chainer.initialize_graph(model=stub_model) init_model_outputs = chainer.define_loss( model=stub_model, features=init_input_fn()[0], mode=estimator_lib.ModeKeys.TRAIN) result_model_outputs = chainer.define_loss( model=stub_model, features=result_input_fn()[0], mode=estimator_lib.ModeKeys.TRAIN) with self.test_session() as session: variables.global_variables_initializer().run() coordinator = coordinator_lib.Coordinator() queue_runner_impl.start_queue_runners(session, coord=coordinator) init_model_outputs.loss.eval() returned_loss = result_model_outputs.loss.eval() coordinator.request_stop() coordinator.join() return returned_loss
def _random_window_input_fn_test_template( self, time_series_reader, window_size, batch_size, num_features, discard_out_of_order=False): input_fn = input_pipeline.RandomWindowInputFn( time_series_reader=time_series_reader, window_size=window_size, batch_size=batch_size) result, _ = input_fn() init_op = variables.local_variables_initializer() with self.cached_session() as session: coordinator = coordinator_lib.Coordinator() queue_runner_impl.start_queue_runners(session, coord=coordinator) session.run(init_op) features = session.run(result) coordinator.request_stop() coordinator.join() self.assertAllEqual([batch_size, window_size], features[TrainEvalFeatures.TIMES].shape) for window_position in range(window_size - 1): for batch_position in range(batch_size): # Checks that all times are contiguous self.assertEqual( features[TrainEvalFeatures.TIMES][batch_position, window_position + 1], features[TrainEvalFeatures.TIMES][batch_position, window_position] + 1) self.assertAllEqual([batch_size, window_size, num_features], features[TrainEvalFeatures.VALUES].shape) self.assertEqual("int64", features[TrainEvalFeatures.TIMES].dtype) for feature_number in range(num_features): self.assertAllEqual( features[TrainEvalFeatures.TIMES] * 2. + feature_number, features[TrainEvalFeatures.VALUES][:, :, feature_number]) return features
def _test_initialization(self, warmup_iterations, batch_size): stub_model = StubTimeSeriesModel() data = self._make_test_data(length=20, cut_start=None, cut_end=None, offset=0.) if batch_size == -1: input_fn = test_utils.AllWindowInputFn( input_pipeline.NumpyReader(data), window_size=10) else: input_fn = input_pipeline.RandomWindowInputFn( input_pipeline.NumpyReader(data), window_size=10, batch_size=batch_size) chainer = state_management.ChainingStateManager( state_saving_interval=1) features, _ = input_fn() stub_model.initialize_graph() chainer.initialize_graph(model=stub_model) model_outputs = chainer.define_loss( model=stub_model, features=features, mode=estimator_lib.ModeKeys.TRAIN) with self.test_session() as session: variables.global_variables_initializer().run() coordinator = coordinator_lib.Coordinator() queue_runner_impl.start_queue_runners(session, coord=coordinator) for _ in range(warmup_iterations): # Warm up saved state model_outputs.loss.eval() outputs = model_outputs.loss.eval() coordinator.request_stop() coordinator.join() return outputs
def _gap_test_template(self, times, values): random_model = RandomStateSpaceModel( state_dimension=1, state_noise_dimension=1, configuration=state_space_model.StateSpaceModelConfiguration( num_features=1)) random_model.initialize_graph() input_fn = input_pipeline.WholeDatasetInputFn( input_pipeline.NumpyReader({ feature_keys.TrainEvalFeatures.TIMES: times, feature_keys.TrainEvalFeatures.VALUES: values })) features, _ = input_fn() times = features[feature_keys.TrainEvalFeatures.TIMES] values = features[feature_keys.TrainEvalFeatures.VALUES] model_outputs = random_model.get_batch_loss( features={ feature_keys.TrainEvalFeatures.TIMES: times, feature_keys.TrainEvalFeatures.VALUES: values }, mode=None, state=math_utils.replicate_state( start_state=random_model.get_start_state(), batch_size=array_ops.shape(times)[0])) with self.cached_session() as session: variables.global_variables_initializer().run() coordinator = coordinator_lib.Coordinator() queue_runner_impl.start_queue_runners(session, coord=coordinator) model_outputs.loss.eval() coordinator.request_stop() coordinator.join()
def _all_window_input_fn_test_template( self, time_series_reader, num_samples, window_size, original_numpy_features=None): input_fn = test_utils.AllWindowInputFn( time_series_reader=time_series_reader, window_size=window_size) features, _ = input_fn() init_op = variables.local_variables_initializer() with self.cached_session() as session: coordinator = coordinator_lib.Coordinator() queue_runner_impl.start_queue_runners(session, coord=coordinator) session.run(init_op) chunked_times, chunked_values = session.run( [features[TrainEvalFeatures.TIMES], features[TrainEvalFeatures.VALUES]]) coordinator.request_stop() coordinator.join() self.assertAllEqual([num_samples - window_size + 1, window_size], chunked_times.shape) if original_numpy_features is not None: original_times = original_numpy_features[TrainEvalFeatures.TIMES] original_values = original_numpy_features[TrainEvalFeatures.VALUES] self.assertAllEqual(original_times, numpy.unique(chunked_times)) self.assertAllEqual(original_values[chunked_times], chunked_values)
def _equivalent_to_single_model_test_template(self, model_generator): with self.cached_session() as session: random_model = RandomStateSpaceModel( state_dimension=5, state_noise_dimension=4, configuration=state_space_model.StateSpaceModelConfiguration( dtype=dtypes.float64, num_features=1)) random_model.initialize_graph() series_length = 10 model_data = random_model.generate( number_of_series=1, series_length=series_length, model_parameters=random_model.random_model_parameters()) input_fn = input_pipeline.WholeDatasetInputFn( input_pipeline.NumpyReader(model_data)) features, _ = input_fn() model_outputs = random_model.get_batch_loss( features=features, mode=None, state=math_utils.replicate_state( start_state=random_model.get_start_state(), batch_size=array_ops.shape( features[feature_keys.TrainEvalFeatures.TIMES])[0])) variables.global_variables_initializer().run() compare_outputs_evaled_fn = model_generator( random_model, model_data) coordinator = coordinator_lib.Coordinator() queue_runner_impl.start_queue_runners(session, coord=coordinator) compare_outputs_evaled = compare_outputs_evaled_fn(session) model_outputs_evaled = session.run( (model_outputs.end_state, model_outputs.predictions)) coordinator.request_stop() coordinator.join() model_posteriors, model_predictions = model_outputs_evaled (_, compare_posteriors, compare_predictions) = compare_outputs_evaled (model_posterior_mean, model_posterior_var, model_from_time) = model_posteriors (compare_posterior_mean, compare_posterior_var, compare_from_time) = compare_posteriors self.assertAllClose(model_posterior_mean, compare_posterior_mean[0]) self.assertAllClose(model_posterior_var, compare_posterior_var[0]) self.assertAllClose(model_from_time, compare_from_time) self.assertEqual(sorted(model_predictions.keys()), sorted(compare_predictions.keys())) for prediction_name in model_predictions: if prediction_name == "loss": # Chunking means that losses will be different; skip testing them. continue # Compare the last chunk to their corresponding un-chunked model # predictions last_prediction_chunk = compare_predictions[prediction_name][-1] comparison_values = last_prediction_chunk.shape[0] model_prediction = ( model_predictions[prediction_name][0, -comparison_values:]) self.assertAllClose(model_prediction, last_prediction_chunk)
def _input_statistics_test_template( self, stat_object, num_features, dtype, give_full_data, warmup_iterations=0, rtol=1e-6, data_length=500, chunk_size=4): graph = ops.Graph() with graph.as_default(): numpy_dtype = dtype.as_numpy_dtype values = ( (numpy.arange(data_length, dtype=numpy_dtype)[..., None] + numpy.arange(num_features, dtype=numpy_dtype)[None, ...])[None]) times = 2 * (numpy.arange(data_length)[None]) - 3 if give_full_data: stat_object.set_data((times, values)) features = {TrainEvalFeatures.TIMES: times, TrainEvalFeatures.VALUES: values} input_fn = input_pipeline.RandomWindowInputFn( batch_size=16, window_size=chunk_size, time_series_reader=input_pipeline.NumpyReader(features)) statistics = stat_object.initialize_graph( features=input_fn()[0]) with self.session(graph=graph) as session: variables.global_variables_initializer().run() coordinator = coordinator_lib.Coordinator() queue_runner_impl.start_queue_runners(session, coord=coordinator) for _ in range(warmup_iterations): # A control dependency should ensure that, for queue-based statistics, # a use of any statistic is preceded by an update of all adaptive # statistics. statistics.total_observation_count.eval() self.assertAllClose( range(num_features) + numpy.mean(numpy.arange(chunk_size))[None], statistics.series_start_moments.mean.eval(), rtol=rtol) self.assertAllClose( numpy.tile(numpy.var(numpy.arange(chunk_size))[None], [num_features]), statistics.series_start_moments.variance.eval(), rtol=rtol) self.assertAllClose( numpy.mean(values[0], axis=0), statistics.overall_feature_moments.mean.eval(), rtol=rtol) self.assertAllClose( numpy.var(values[0], axis=0), statistics.overall_feature_moments.variance.eval(), rtol=rtol) self.assertAllClose( -3, statistics.start_time.eval(), rtol=rtol) self.assertAllClose( data_length, statistics.total_observation_count.eval(), rtol=rtol) coordinator.request_stop() coordinator.join()
def testStartQueueRunnersRaisesIfNotASession(self): zero64 = constant_op.constant(0, dtype=dtypes.int64) var = variables.VariableV1(zero64) count_up_to = var.count_up_to(3) queue = data_flow_ops.FIFOQueue(10, dtypes.float32) init_op = variables.global_variables_initializer() qr = queue_runner_impl.QueueRunner(queue, [count_up_to]) queue_runner_impl.add_queue_runner(qr) with self.cached_session(): init_op.run() with self.assertRaisesRegexp(TypeError, "tf.Session"): queue_runner_impl.start_queue_runners("NotASession")
def testExtendAfterQueueRunners(self): server = self._cached_server with session.Session(server.target) as sess: input_queue = input_ops.input_producer(constant_op.constant( [0.], dtype=dtypes.float32)) self.assertIsNotNone(input_queue) var = variables.VariableV1(1., dtype=dtypes.float32, trainable=False, name="var") sess.run(variables.global_variables_initializer()) queue_runner_impl.start_queue_runners(sess) sess.run(var.assign(3.0))
def test_long_eval(self): g = ops.Graph() with g.as_default(): model = ARModel(periodicities=2, num_features=1, num_time_buckets=10, input_window_size=2, output_window_size=1) raw_features = { TrainEvalFeatures.TIMES: [[1, 3, 5, 7, 11]], TrainEvalFeatures.VALUES: [[[1.], [2.], [3.], [4.], [5.]]]} chunked_features, _ = test_utils.AllWindowInputFn( time_series_reader=input_pipeline.NumpyReader(raw_features), window_size=3)() model.initialize_graph() with variable_scope.variable_scope("armodel") as scope: raw_evaluation = model.define_loss( raw_features, mode=estimator_lib.ModeKeys.EVAL) with variable_scope.variable_scope(scope, reuse=True): chunked_evaluation = model.define_loss( chunked_features, mode=estimator_lib.ModeKeys.EVAL) with session.Session() as sess: coordinator = coordinator_lib.Coordinator() queue_runner_impl.start_queue_runners(sess, coord=coordinator) variables.global_variables_initializer().run() raw_evaluation_evaled, chunked_evaluation_evaled = sess.run( [raw_evaluation, chunked_evaluation]) self.assertAllEqual(chunked_evaluation_evaled.loss, raw_evaluation_evaled.loss) last_chunk_evaluation_state = [ state[-1, None] for state in chunked_evaluation_evaled.end_state] for last_chunk_state_member, raw_state_member in zip( last_chunk_evaluation_state, raw_evaluation_evaled.end_state): self.assertAllEqual(last_chunk_state_member, raw_state_member) self.assertAllEqual([[5, 7, 11]], raw_evaluation_evaled.prediction_times) for feature_name in raw_evaluation.predictions: self.assertAllEqual( [1, 3, 1], # batch, window, num_features. The window size has 2 # cut off for the first input_window. raw_evaluation_evaled.predictions[feature_name].shape) self.assertAllEqual( np.reshape(chunked_evaluation_evaled.predictions[feature_name], [-1]), np.reshape(raw_evaluation_evaled.predictions[feature_name], [-1])) coordinator.request_stop() coordinator.join()
def testPandasFeeding(self): if not HAS_PANDAS: return with ops.Graph().as_default(): array1 = np.arange(32) array2 = np.arange(32, 64) df = pd.DataFrame({"a": array1, "b": array2}, index=np.arange(64, 96)) q = ff._enqueue_data(df, capacity=100) batch_size = 5 dq_op = q.dequeue_many(5) with session.Session() as sess: coord = coordinator.Coordinator() threads = queue_runner_impl.start_queue_runners(sess=sess, coord=coord) for i in range(100): indices = [ j % array1.shape[0] for j in range(batch_size * i, batch_size * (i + 1)) ] expected_df_indices = df.index[indices] expected_rows = df.iloc[indices] dq = sess.run(dq_op) np.testing.assert_array_equal(expected_df_indices, dq[0]) for col_num, col in enumerate(df.columns): np.testing.assert_array_equal(expected_rows[col].values, dq[col_num + 1]) coord.request_stop() coord.join(threads)
def testPandasFeeding(self): if not HAS_PANDAS: return batch_size = 3 iterations = 1000 index = np.arange(100, 132) a = np.arange(32) b = np.arange(32, 64) dataframe = pd.DataFrame({"a": a, "b": b}, index=index) pandas_source = in_memory_source.PandasSource( dataframe, batch_size=batch_size) pandas_columns = pandas_source() cache = {} with ops.Graph().as_default(): pandas_tensors = [col.build(cache) for col in pandas_columns] with session.Session() as sess: coord = coordinator.Coordinator() threads = queue_runner_impl.start_queue_runners(sess=sess, coord=coord) for i in range(iterations): indices = [ j % dataframe.shape[0] for j in range(batch_size * i, batch_size * (i + 1)) ] expected_df_indices = dataframe.index[indices] expected_rows = dataframe.iloc[indices] actual_value = sess.run(pandas_tensors) np.testing.assert_array_equal(expected_df_indices, actual_value[0]) for col_num, col in enumerate(dataframe.columns): np.testing.assert_array_equal(expected_rows[col].values, actual_value[col_num + 1]) coord.request_stop() coord.join(threads)
def testNumpySource(self): batch_size = 3 iterations = 1000 array = np.arange(32).reshape([16, 2]) numpy_source = in_memory_source.NumpySource(array, batch_size=batch_size) index_column = numpy_source().index value_column = numpy_source().value cache = {} with ops.Graph().as_default(): value_tensor = value_column.build(cache) index_tensor = index_column.build(cache) with session.Session() as sess: coord = coordinator.Coordinator() threads = queue_runner_impl.start_queue_runners(sess=sess, coord=coord) for i in range(iterations): expected_index = [ j % array.shape[0] for j in range(batch_size * i, batch_size * (i + 1)) ] expected_value = get_rows(array, expected_index) actual_index, actual_value = sess.run([index_tensor, value_tensor]) np.testing.assert_array_equal(expected_index, actual_index) np.testing.assert_array_equal(expected_value, actual_value) coord.request_stop() coord.join(threads)
def verify_tfrecord_image(dataset_dir, create_input_fn, channels=3): import matplotlib.pyplot as plt from tensorflow.python.training import coordinator from tensorflow.python.training import queue_runner_impl def details(img, label): print('------image: {}'.format(label)) plt.imshow(img) plt.show() create_input_fns = create_input_fn(dataset_dir) for input_fn in create_input_fns: with tf.Session() as session: image, label = input_fn() coord = coordinator.Coordinator() threads = queue_runner_impl.start_queue_runners(session, coord=coord) img, lab = session.run([image['image'], label['label']]) print('Train data {}'.format(img[:, :, :].shape)) for i in xrange(3): details(img[i, :, :, :] if channels > 1 else img[i, :, :, 0], lab[i]) coord.request_stop() coord.join(threads)
def testNumpyInputFnWithYAsDict(self): a = np.arange(4) * 1.0 b = np.arange(32, 36) x = {'a': a, 'b': b} y = {'y1': np.arange(-32, -28), 'y2': np.arange(32, 28, -1)} with self.test_session() as session: input_fn = numpy_io.numpy_input_fn( x, y, batch_size=2, shuffle=False, num_epochs=1) features_tensor, targets_tensor = input_fn() coord = coordinator.Coordinator() threads = queue_runner_impl.start_queue_runners(session, coord=coord) features, targets = session.run([features_tensor, targets_tensor]) self.assertEqual(len(features), 2) self.assertAllEqual(features['a'], [0, 1]) self.assertAllEqual(features['b'], [32, 33]) self.assertEqual(len(targets), 2) self.assertAllEqual(targets['y1'], [-32, -31]) self.assertAllEqual(targets['y2'], [32, 31]) session.run([features_tensor, targets_tensor]) with self.assertRaises(errors.OutOfRangeError): session.run([features_tensor, targets_tensor]) coord.request_stop() coord.join(threads)
def testNumpyInputFn(self): a = np.arange(4) * 1.0 b = np.arange(32, 36) x = {'a': a, 'b': b} y = np.arange(-32, -28) with self.test_session() as session: input_fn = numpy_io.numpy_input_fn( x, y, batch_size=2, shuffle=False, num_epochs=1) features, target = input_fn() coord = coordinator.Coordinator() threads = queue_runner_impl.start_queue_runners(session, coord=coord) res = session.run([features, target]) self.assertAllEqual(res[0]['a'], [0, 1]) self.assertAllEqual(res[0]['b'], [32, 33]) self.assertAllEqual(res[1], [-32, -31]) session.run([features, target]) with self.assertRaises(errors.OutOfRangeError): session.run([features, target]) coord.request_stop() coord.join(threads)
def test_linear_model_numpy_input_fn(self): price = fc.numeric_column('price') price_buckets = fc.bucketized_column(price, boundaries=[0., 10., 100.,]) body_style = fc.categorical_column_with_vocabulary_list( 'body-style', vocabulary_list=['hardtop', 'wagon', 'sedan']) input_fn = numpy_io.numpy_input_fn( x={ 'price': np.array([-1., 2., 13., 104.]), 'body-style': np.array(['sedan', 'hardtop', 'wagon', 'sedan']), }, batch_size=2, shuffle=False) features = input_fn() net = fc.linear_model(features, [price_buckets, body_style]) # self.assertEqual(1 + 3 + 5, net.shape[1]) with self._initialized_session() as sess: coord = coordinator.Coordinator() threads = queue_runner_impl.start_queue_runners(sess, coord=coord) bias = self._get_linear_model_bias() price_buckets_var = self._get_linear_model_column_var(price_buckets) body_style_var = self._get_linear_model_column_var(body_style) sess.run(price_buckets_var.assign([[10.], [100.], [1000.], [10000.]])) sess.run(body_style_var.assign([[-10.], [-100.], [-1000.]])) sess.run(bias.assign([5.])) self.assertAllClose([[10 - 1000 + 5.], [100 - 10 + 5.]], sess.run(net)) coord.request_stop() coord.join(threads)
def _test(self): rng = np.arange( -NUMPY_ARRAY_SIZE // 2, NUMPY_ARRAY_SIZE // 2, dtype="float32") frame = df.TensorFlowDataFrame.from_numpy( rng, batch_size=len(rng), shuffle=False) frame["sqr"] = frame["value"].square() self.assertTrue(hasattr(frame["value"], fn_name)) frame["series_result"] = getattr(frame["value"], fn_name)(frame["sqr"]) frame["scalar_result"] = getattr(frame["value"], fn_name)(SCALAR) frame_built = frame.build() expected_series_tensor = op(frame_built["value"], frame_built["sqr"]) actual_series_tensor = frame_built["series_result"] expected_scalar_tensor = op(frame_built["value"], SCALAR) actual_scalar_tensor = frame_built["scalar_result"] session = session_lib.Session() coord = coordinator.Coordinator() threads = queue_runner_impl.start_queue_runners(sess=session, coord=coord) actual_series, expected_series, actual_scalar, expected_scalar = ( session.run([ actual_series_tensor, expected_series_tensor, actual_scalar_tensor, expected_scalar_tensor ])) coord.request_stop() coord.join(threads) np.testing.assert_almost_equal(expected_series, actual_series) np.testing.assert_almost_equal(expected_scalar, actual_scalar)
def testNumpyInputFnWithYIsNone(self): a = np.arange(4) * 1.0 b = np.arange(32, 36) x = {'a': a, 'b': b} y = None with self.test_session() as session: input_fn = numpy_io.numpy_input_fn( x, y, batch_size=2, shuffle=False, num_epochs=1) features_tensor = input_fn() coord = coordinator.Coordinator() threads = queue_runner_impl.start_queue_runners(session, coord=coord) feature = session.run(features_tensor) self.assertEqual(len(feature), 2) self.assertAllEqual(feature['a'], [0, 1]) self.assertAllEqual(feature['b'], [32, 33]) session.run([features_tensor]) with self.assertRaises(errors.OutOfRangeError): session.run([features_tensor]) coord.request_stop() coord.join(threads)
def testNumpyInputFnWithBatchSizeNotDividedByDataSize(self): batch_size = 2 a = np.arange(5) * 1.0 b = np.arange(32, 37) x = {'a': a, 'b': b} y = np.arange(-32, -27) with self.test_session() as session: input_fn = numpy_io.numpy_input_fn( x, y, batch_size=batch_size, shuffle=False, num_epochs=1) features, target = input_fn() coord = coordinator.Coordinator() threads = queue_runner_impl.start_queue_runners(session, coord=coord) res = session.run([features, target]) self.assertAllEqual(res[0]['a'], [0, 1]) self.assertAllEqual(res[0]['b'], [32, 33]) self.assertAllEqual(res[1], [-32, -31]) res = session.run([features, target]) self.assertAllEqual(res[0]['a'], [2, 3]) self.assertAllEqual(res[0]['b'], [34, 35]) self.assertAllEqual(res[1], [-30, -29]) res = session.run([features, target]) self.assertAllEqual(res[0]['a'], [4]) self.assertAllEqual(res[0]['b'], [36]) self.assertAllEqual(res[1], [-28]) with self.assertRaises(errors.OutOfRangeError): session.run([features, target]) coord.request_stop() coord.join(threads)
def testBatch(self): initial_batch_size = 7 final_batch_size = 13 iterations = 50 numpy_cols = in_memory_source.NumpySource( np.arange(1000, 2000), batch_size=initial_batch_size)() index_column = numpy_cols.index value_column = numpy_cols.value batcher = batch.Batch( batch_size=final_batch_size, output_names=["index", "value"]) batched = batcher([index_column, value_column]) cache = {} index_tensor = batched.index.build(cache) value_tensor = batched.value.build(cache) with self.test_session() as sess: coord = coordinator.Coordinator() threads = queue_runner_impl.start_queue_runners(sess=sess, coord=coord) for i in range(iterations): expected_index = range(i * final_batch_size, (i + 1) * final_batch_size) expected_value = range(1000 + i * final_batch_size, 1000 + (i + 1) * final_batch_size) actual_index, actual_value = sess.run([index_tensor, value_tensor]) np.testing.assert_array_equal(expected_index, actual_index) np.testing.assert_array_equal(expected_value, actual_value) coord.request_stop() coord.join(threads)
def test_keyed_features_filter(self): gfile.Glob = self._orig_glob lines = [ '{"features": {"feature": {"age": {"int64_list": {"value": [2]}}}}}', '{"features": {"feature": {"age": {"int64_list": {"value": [0]}}}}}', '{"features": {"feature": {"age": {"int64_list": {"value": [1]}}}}}', '{"features": {"feature": {"age": {"int64_list": {"value": [0]}}}}}', '{"features": {"feature": {"age": {"int64_list": {"value": [3]}}}}}', '{"features": {"feature": {"age": {"int64_list": {"value": [5]}}}}}' ] filename = self._create_temp_file("\n".join(lines)) batch_size = 2 queue_capacity = 4 name = "my_batch" features = {"age": parsing_ops.FixedLenFeature([], dtypes_lib.int64)} def filter_fn(keys, examples_json): del keys serialized = parsing_ops.decode_json_example(examples_json) examples = parsing_ops.parse_example(serialized, features) return math_ops.less(examples["age"], 2) with ops.Graph().as_default() as g, self.session(graph=g) as session: keys, inputs = graph_io._read_keyed_batch_examples_helper( filename, batch_size, reader=io_ops.TextLineReader, randomize_input=False, num_epochs=1, read_batch_size=batch_size, queue_capacity=queue_capacity, filter_fn=filter_fn, name=name) self.assertAllEqual((None,), keys.get_shape().as_list()) self.assertAllEqual((None,), inputs.get_shape().as_list()) session.run(variables.local_variables_initializer()) coord = coordinator.Coordinator() threads = queue_runner_impl.start_queue_runners(session, coord=coord) # First batch of two filtered examples. out_keys, out_vals = session.run((keys, inputs)) self.assertAllEqual( [filename.encode("utf-8") + b":2", filename.encode("utf-8") + b":3"], out_keys) self.assertAllEqual([lines[1].encode("utf-8"), lines[2].encode("utf-8")], out_vals) # Second batch will only have one filtered example as that's the only # remaining example that satisfies the filtering criterion. out_keys, out_vals = session.run((keys, inputs)) self.assertAllEqual([filename.encode("utf-8") + b":4"], out_keys) self.assertAllEqual([lines[3].encode("utf-8")], out_vals) # Exhausted input. with self.assertRaises(errors.OutOfRangeError): session.run((keys, inputs)) coord.request_stop() coord.join(threads)
def testGeneratorInputFn(self): def generator(): for index in range(2): yield { 'a': np.ones(1) * index, 'b': np.ones(1) * index + 32, 'label': np.ones(1) * index - 32 } with self.cached_session() as session: input_fn = generator_io.generator_input_fn( generator, target_key='label', batch_size=2, shuffle=False, num_epochs=1) features, target = input_fn() coord = coordinator.Coordinator() threads = queue_runner_impl.start_queue_runners(session, coord=coord) res = session.run([features, target]) self.assertAllEqual(res[0]['a'], np.asarray([0, 1]).reshape(-1, 1)) self.assertAllEqual(res[0]['b'], np.asarray([32, 33]).reshape(-1, 1)) self.assertAllEqual(res[1], np.asarray([-32, -31]).reshape(-1, 1)) session.run([features]) with self.assertRaises(errors.OutOfRangeError): session.run([features, target]) coord.request_stop() coord.join(threads)
def testGeneratorInputFnWithDifferentDimensionsOfFeatures(self): def generator(): for index in range(100): yield { 'a': np.ones((10, 10)) * index, 'b': np.ones((5, 5)) * index + 32, 'label': np.ones((3, 3)) * index - 32 } with self.cached_session() as session: input_fn = generator_io.generator_input_fn( generator, target_key='label', batch_size=2, shuffle=False, num_epochs=1) features, target = input_fn() coord = coordinator.Coordinator() threads = queue_runner_impl.start_queue_runners(session, coord=coord) res = session.run([features, target]) self.assertAllEqual(res[0]['a'], np.vstack((np.zeros((10, 10)), np.ones( (10, 10)))).reshape(2, 10, 10)) self.assertAllEqual(res[0]['b'], np.vstack((np.zeros((5, 5)), np.ones( (5, 5)))).reshape(2, 5, 5) + 32) self.assertAllEqual(res[1], np.vstack((np.zeros((3, 3)), np.ones( (3, 3)))).reshape(2, 3, 3) - 32) coord.request_stop() coord.join(threads)
def testNotAMultiple(self): num_unroll = 3 # Not a divisor of value_length - # so padding would have been necessary. with self.test_session() as sess: with self.assertRaisesRegexp(errors_impl.InvalidArgumentError, ".*should be a multiple of: 3, but saw " "value: 4. Consider setting pad=True."): coord = coordinator.Coordinator() threads = None try: with coord.stop_on_exception(): next_batch = sqss.batch_sequences_with_states( input_key=self.key, input_sequences=self.sequences, input_context=self.context, input_length=3, initial_states=self.initial_states, num_unroll=num_unroll, batch_size=self.batch_size, num_threads=3, # to enforce that we only move on to the next examples after # finishing all segments of the first ones. capacity=2, pad=False) threads = queue_runner_impl.start_queue_runners(coord=coord) sess.run([next_batch.key]) except errors_impl.OutOfRangeError: pass finally: coord.request_stop() if threads is not None: coord.join(threads, stop_grace_period_secs=2)
def testPandasInputFn_ProducesOutputsForLargeBatchAndMultipleEpochs(self): if not HAS_PANDAS: return with self.test_session() as session: index = np.arange(100, 102) a = np.arange(2) b = np.arange(32, 34) x = pd.DataFrame({'a': a, 'b': b}, index=index) y = pd.Series(np.arange(-32, -30), index=index) input_fn = pandas_io.pandas_input_fn( x, y, batch_size=128, shuffle=False, num_epochs=2) results = input_fn() coord = coordinator.Coordinator() threads = queue_runner_impl.start_queue_runners(session, coord=coord) features, target = session.run(results) self.assertAllEqual(features['a'], [0, 1, 0, 1]) self.assertAllEqual(features['b'], [32, 33, 32, 33]) self.assertAllEqual(target, [-32, -31, -32, -31]) with self.assertRaises(errors.OutOfRangeError): session.run(results) coord.request_stop() coord.join(threads)
def test_batch_text_lines(self): gfile.Glob = self._orig_glob filename = self._create_temp_file("A\nB\nC\nD\nE\n") batch_size = 3 queue_capacity = 10 name = "my_batch" with ops.Graph().as_default() as g, self.test_session(graph=g) as session: inputs = graph_io.read_batch_examples( [filename], batch_size, reader=io_ops.TextLineReader, randomize_input=False, num_epochs=1, queue_capacity=queue_capacity, read_batch_size=10, name=name) self.assertAllEqual((None,), inputs.get_shape().as_list()) session.run(variables.local_variables_initializer()) coord = coordinator.Coordinator() threads = queue_runner_impl.start_queue_runners(session, coord=coord) self.assertAllEqual(session.run(inputs), [b"A", b"B", b"C"]) self.assertAllEqual(session.run(inputs), [b"D", b"E"]) with self.assertRaises(errors.OutOfRangeError): session.run(inputs) coord.request_stop() coord.join(threads)
def testGeneratorInputFnWithMismatchinGeneratorKeys(self): def generator(): index = 0 yield { 'a': np.ones(1) * index, 'b': np.ones(1) * index + 32, 'label': np.ones(1) * index - 32 } index = 1 yield { 'a': np.ones(1) * index, 'c': np.ones(1) * index + 32, 'label': np.ones(1) * index - 32 } with self.cached_session() as session: input_fn = generator_io.generator_input_fn( generator, target_key=None, batch_size=2, shuffle=False, num_epochs=1) features = input_fn() coord = coordinator.Coordinator() threads = queue_runner_impl.start_queue_runners(session, coord=coord) with self.assertRaises(errors.OutOfRangeError): session.run([features]) with self.assertRaisesRegex(KeyError, 'key mismatch between dicts emitted' ' by GenFunExpected'): coord.request_stop() coord.join(threads)
def start_queue_runners(self, sess): # Store session to be able to close inputs later if self._sess is None: self._sess = sess self._threads = queue_runner_impl.start_queue_runners(coord=self._coord)
def _testBucketBySequenceLength(self, allow_small_batch, bucket_capacities=None, drain_entire_queue=True): ops.reset_default_graph() # All inputs must be identical lengths across tuple index. # The input reader will get input_length from the first tuple # entry. data_len = 4 labels_len = 3 input_pairs = [(length, ([np.int64(length)] * data_len, [str(length).encode("ascii")] * labels_len)) for length in (1, 3, 4, 5, 6, 10)] lengths = array_ops.placeholder(dtypes_lib.int32, ()) data = array_ops.placeholder(dtypes_lib.int64, (data_len,)) labels = array_ops.placeholder(dtypes_lib.string, (labels_len,)) batch_size = 8 bucket_boundaries = [3, 4, 5, 10] num_pairs_to_enqueue = 50 * batch_size + 100 # Make capacity very large so we can feed all the inputs in the # main thread without blocking input_queue = data_flow_ops.FIFOQueue( 5000, (dtypes_lib.int32, dtypes_lib.int64, dtypes_lib.string), ( (), (data_len,), (labels_len,))) input_enqueue_op = input_queue.enqueue((lengths, data, labels)) lengths_t, data_t, labels_t = input_queue.dequeue() close_input_op = input_queue.close() (out_lengths_t, data_and_labels_t) = (bucket_ops.bucket_by_sequence_length( input_length=lengths_t, tensors=[data_t, labels_t], batch_size=batch_size, bucket_boundaries=bucket_boundaries, bucket_capacities=bucket_capacities, allow_smaller_final_batch=allow_small_batch, num_threads=10)) expected_batch_size = None if allow_small_batch else batch_size self.assertEqual(out_lengths_t.get_shape().as_list(), [expected_batch_size]) self.assertEqual(data_and_labels_t[0].get_shape().as_list(), [expected_batch_size, data_len]) self.assertEqual(data_and_labels_t[1].get_shape().as_list(), [expected_batch_size, labels_len]) def _read_test(sess): num_pairs_dequeued = 0 try: while drain_entire_queue or num_pairs_dequeued < 40 * batch_size: (out_lengths, (data, labels)) = sess.run( (out_lengths_t, data_and_labels_t)) num_pairs_dequeued += out_lengths.shape[0] if allow_small_batch: self.assertEqual(data_len, data.shape[1]) self.assertEqual(labels_len, labels.shape[1]) self.assertGreaterEqual(batch_size, out_lengths.shape[0]) self.assertGreaterEqual(batch_size, data.shape[0]) self.assertGreaterEqual(batch_size, labels.shape[0]) else: self.assertEqual((batch_size, data_len), data.shape) self.assertEqual((batch_size, labels_len), labels.shape) self.assertEqual((batch_size,), out_lengths.shape) for (lr, dr, tr) in zip(out_lengths, data, labels): # Make sure length matches data (here it's the same value). self.assertEqual(dr[0], lr) # Make sure data & labels match. self.assertEqual(dr[0], int(tr[0].decode("ascii"))) # Make sure for each row, data came from the same bucket. self.assertEqual( _which_bucket(bucket_boundaries, dr[0]), _which_bucket(bucket_boundaries, dr[1])) except errors.OutOfRangeError: if allow_small_batch: self.assertEqual(num_pairs_to_enqueue, num_pairs_dequeued) else: # Maximum left over in the queues should be at most one less than the # batch_size, for every bucket. num_buckets = len(bucket_boundaries) + 2 self.assertLessEqual( num_pairs_to_enqueue - (batch_size - 1) * num_buckets, num_pairs_dequeued) with self.cached_session() as sess: coord = coordinator.Coordinator() # Feed the inputs, then close the input thread. for _ in range(num_pairs_to_enqueue): which = random.randint(0, len(input_pairs) - 1) length, pair = input_pairs[which] sess.run(input_enqueue_op, feed_dict={lengths: length, data: pair[0], labels: pair[1]}) sess.run(close_input_op) # Start the queue runners threads = queue_runner_impl.start_queue_runners(coord=coord) # Read off the top of the bucket and ensure correctness of output _read_test(sess) coord.request_stop() coord.join(threads)
def train(): vocab_size = len(open(FLAGS.vocab_file).readlines()) id_to_label = load_id_to_label() num_label = len(id_to_label) print('#vocab={} #label={}'.format(vocab_size, num_label)) parse_spec = get_parse_spec(FLAGS.use_ngrams, num_label) features = tf.contrib.learn.read_batch_features( FLAGS.train_tfrecord, FLAGS.batch_size, parse_spec, tf.TFRecordReader, num_epochs=FLAGS.num_epochs, reader_num_threads=FLAGS.num_threads) text_ts = tf.sparse_tensor_to_dense(features[TEXT_KEY], default_value=DEFAULT_WORD) label_ts = features.pop(LABELS_KEY) # text_ph = tf.placeholder(tf.string, shape=(None, None)) text_ph = tf.placeholder(tf.int64, shape=(None, None)) label_ph = tf.placeholder(tf.float32, shape=(None, num_label)) # text_lookup_table = tf.contrib.lookup.index_table_from_file( # FLAGS.vocab_file, FLAGS.num_oov_vocab_buckets, vocab_size) # text_ids = text_lookup_table.lookup(text_ph) text_ids = text_ph # text_embedding_w = tf.Variable(tf.random_uniform([vocab_size + FLAGS.num_oov_vocab_buckets, FLAGS.embedding_dimension], -0.1, 0.1)) text_embedding_w = tf.Variable( tf.random_uniform([vocab_size + 1, FLAGS.embedding_dimension], -0.1, 0.1)) text_embedding = tf.reduce_mean(tf.nn.embedding_lookup( text_embedding_w, text_ids), axis=-2) input_layer = text_embedding logits_ts = tf.contrib.layers.fully_connected(inputs=input_layer, num_outputs=num_label, activation_fn=None) loss_ts = tf.reduce_mean( tf.nn.sigmoid_cross_entropy_with_logits(labels=label_ph, logits=logits_ts)) optimizer = tf.train.AdamOptimizer(FLAGS.learning_rate) train_op = optimizer.minimize(loss_ts, global_step=tf.train.get_global_step()) var_init = tf.global_variables_initializer() tab_init = tf.tables_initializer() tf.summary.scalar('loss', loss_ts) summary_op = tf.summary.merge_all() features_v = tf.contrib.learn.read_batch_features( FLAGS.valid_tfrecord, FLAGS.batch_size, parse_spec, tf.TFRecordReader, num_epochs=1, reader_num_threads=FLAGS.num_threads) text_ts_v = tf.sparse_tensor_to_dense(features_v[TEXT_KEY], default_value=DEFAULT_WORD) label_ts_v = features_v.pop(LABELS_KEY) from tensorflow.python.framework import errors from tensorflow.python.ops import variables from tensorflow.python.training import coordinator from tensorflow.python.training import queue_runner_impl with tf.Session() as sess: writer = tf.summary.FileWriter(FLAGS.logs_dir, graph=tf.get_default_graph()) sess.run(variables.local_variables_initializer()) coord = coordinator.Coordinator() threads = queue_runner_impl.start_queue_runners(sess, coord=coord) sess.run(var_init) sess.run(tab_init) total_size = 0 try: while not coord.should_stop(): # feature_np, label_np = sess.run([features, label_ts]) # text_np = feature_np[TEXT_KEY] # print(type(text_np), text_np.shape, type(label_np), label_np.shape) # for i in range(FLAGS.batch_size): # label_ids = [j for j in range(num_label) if label_np[i,j] != 0] # labels = [id_to_label[label_id] for label_id in label_ids] # text = [text_np[i,j].decode('utf-8') for j in range(text_np.shape[1]) if text_np[i,j] != b' '] # text = ' '.join(text) # print(str(text), labels) # input() # input() for train_step in range(1000000): text_np, label_np = sess.run([text_ts, label_ts]) total_size += FLAGS.batch_size # print(type(text_np), text_np.shape, type(label_np), label_np.shape) # for i in range(FLAGS.batch_size): # label_ids = [j for j in range(num_label) if label_np[i,j] != 0] # labels = [id_to_label[label_id] for label_id in label_ids] # text = [text_np[i,j].decode('utf-8') for j in range(text_np.shape[1]) if text_np[i,j] != b' '] # text = ' '.join(text) # print(str(text), labels) # input() feed_dict = {text_ph: text_np, label_ph: label_np} _, loss, summary = sess.run( [train_op, loss_ts, summary_op], feed_dict=feed_dict) if (train_step + 1) % 100 == 0: writer.add_summary(summary, train_step) print('#{0} loss={1:.4f}'.format(train_step, loss)) except errors.OutOfRangeError: print('total={}'.format(total_size)) cutoff = 3 prec_v, rec_v = [], [] for valid_step in range(int(2000 / FLAGS.batch_size)): text_np, label_np = sess.run([text_ts_v, label_ts_v]) feed_dict = {text_ph: text_np, label_ph: label_np} logits, = sess.run([logits_ts], feed_dict=feed_dict) prec_bt = precision(logits, label_np, cutoff) prec_v.append(prec_bt) rec_bt = recall(logits, label_np, cutoff) rec_v.append(rec_bt) prec_v, rec_v = np.mean(prec_v), np.mean(rec_v) print('prec={0:.4f} rec={1:.4f}'.format(prec_v, rec_v)) finally: coord.request_stop() coord.join(threads)
def train(model, input_dims, output_dims, seq_length, size, num_gpus, dataset, experiment_name, load_model, num_vids, n_epochs, split, base_data_path, f_name, learning_rate_init, wd, save_freq, clip_length, video_offset, clip_offset, num_clips, clip_stride, batch_size, loss_type, metrics_dir, loaded_checkpoint, verbose, opt_choice, gpu_list, grad_clip_value, preproc_method, random_init, shuffle_seed, preproc_debugging, reverse): """ Training function used to train or fine-tune a chosen model Args: :model: tf-activity-recognition framework model object :input_dims: Number of frames used in input :output_dims: Integer number of classes in current dataset :seq_length: Length of output sequence expected from LSTM :size: List detailing height and width of frame :num_gpus: Number of gpus to use when training :dataset: Name of dataset being processed :experiment_name: Name of current experiment :load_model: Boolean variable indicating whether to load from a checkpoint or not :num_vids: Number of videos to be used for training :n_epochs: Total number of epochs to train :split: Split of dataset being used :base_data_path: Full path to root directory containing datasets :f_name: Specific video directory within a chosen split of a dataset :learning_rate_init: Initializer for learning rate :wd: Weight decay :save_freq: Frequency, in epochs, with which to save :clip_length: Length of clips to cut video into, -1 indicates using the entire video as one clip') :video_offset: String indicating where to begin selecting video clips (provided clipOffset is None) :clip_offset: "none" or "random" indicating where to begin selecting video clips :num_clips: Number of clips to break video into :clip_stride: Number of frames that overlap between clips, 0 indicates no overlap and negative values indicate a gap of frames between clips :batch_size: Number of clips to load into the model each step. :loss_type: String declaring loss type associated with a chosen model :metrics_dir: Name of subdirectory within the experiment to store metrics. Unique directory names allow for parallel testing :loaded_checkpoint: Specify the exact checkpoint of saved model to be loaded for further training/testing :verbose: Boolean to indicate if all print statement should be procesed or not :opt_choice: String indicating optimizer selected :gpu_list: List of GPU IDs to be used :grad_clip_value: Float value at which to clip normalized gradients :lr_boundaries: List of epoch boundaries at which lr will be updated :lr_values: List of lr multipliers to learning_rate_init at boundaries mentioned in lr_boundaries :preproc_method: The preprocessing method to use, default, cvr, rr, sr, or any other custom preprocessing :random_init: Randomly initialize model weights, not loading from any files (deafult False) :preproc_debugging: Boolean indicating whether to load videos and clips in a queue or to load them directly for debugging (Default 0) :reverse: Boolean indicating whether reverse videos and classify them as a new action class. Returns: Does not return anything """ with tf.name_scope("my_scope") as scope: # Initializers for checkpoint and global step variable ckpt = None gs_init = 0 ################################### Checkpoint loading block ####################################################### # Load pre-trained/saved model to continue training (or fine-tune) if load_model: try: ckpt, gs_init, learning_rate_init = load_checkpoint( model.name, dataset, experiment_name, loaded_checkpoint, preproc_method) if verbose: print 'A better checkpoint is found. The global_step value is: ' + str( gs_init) except: if verbose: print "Failed loading checkpoint requested. Please check." exit() # END TRY else: ckpt = model.load_default_weights() # END IF ###################################################################################################################### # Initialize model variables global_step = tf.Variable(gs_init, name='global_step', trainable=False) number_of_videos = tf.Variable(num_vids, name='number_of_videos', trainable=False) number_of_epochs = tf.Variable(n_epochs, name='number_of_epochs', trainable=False) video_step = tf.Variable(1.0, name='video_step', trainable=False) istraining = True reuse_variables = None # TF session setup config = tf.ConfigProto( allow_soft_placement=True ) #, gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.8)) sess = tf.Session(config=config) init = tf.global_variables_initializer() # Variables get randomly initialized into tf graph sess.run(init) tower_losses = [] tower_grads = [] tower_slogits = [] data_path = os.path.join(base_data_path, 'tfrecords_' + dataset, 'Split' + str(split), f_name) # Setup tensors for models # input_data_tensor - [batchSize, inputDims, height, width, channels] input_data_tensor, labels_tensor, names_tensor = load_dataset( model, num_gpus, batch_size, output_dims, input_dims, seq_length, size, data_path, dataset, istraining, clip_length, video_offset, clip_offset, num_clips, clip_stride, video_step, preproc_debugging, shuffle_seed, verbose, reverse=reverse) ############### TO DO: FIX THIS ASAP ######################## if ((batch_size == 1) and (num_clips == 1)): sess.run(tf.assign_add(video_step, -2)) else: sess.run(tf.assign_add(video_step, -1)) # END IF ############################################################ learning_rate = tf.Variable(learning_rate_init, name='learning_rate', trainable=False) # Define optimizer (Current selection is only momentum optimizer) if opt_choice == 'gd': optimizer = lambda lr: tf.train.GradientDescentOptimizer(lr) elif opt_choice == 'adam': optimizer = lambda lr: tf.train.AdamOptimizer(lr) else: optimizer = lambda lr: tf.train.MomentumOptimizer(learning_rate=lr, momentum=0.9) # END IF """ Multi-GPU setup: 1) Associate gpu device to specific model replica 2) Setup tower name scope for variables """ ################# GPU list check block #################### assert ((len(gpu_list) == num_gpus) or (len(gpu_list) == 0)) if len(gpu_list) == 0: gpu_list = [str(x) for x in range(num_gpus)] # END IF ########################################################### ################################################## Setup TF graph block ###################################################### for gpu_idx in range(num_gpus): with tf.device('/gpu:' + str(gpu_list[gpu_idx])): with tf.name_scope('%s_%d' % ('tower', int(gpu_list[gpu_idx]))) as scope: with tf.variable_scope(tf.get_variable_scope(), reuse=reuse_variables): returned_layers = model.inference( input_data_tensor[gpu_idx * batch_size:gpu_idx * batch_size + batch_size, :, :, :, :], istraining, input_dims, output_dims, seq_length, scope, return_layer=['logits'], weight_decay=wd) logits = tf.cast(returned_layers[0], tf.float32) # Calculating Softmax for probability outcomes : Can be modified, make function internal to model slogits = tf.nn.softmax(logits) # END WITH reuse_variables = True """ Within GPU mini-batch: 1) Calculate loss, 2) Initialize optimizer with required learning rate and 3) Compute gradients 4) Aggregate losses, gradients and logits """ total_loss = model.loss( logits, labels_tensor[gpu_idx * batch_size:gpu_idx * batch_size + batch_size, :], loss_type) opt = optimizer(learning_rate) gradients = opt.compute_gradients( total_loss, vars_.trainable_variables()) tower_losses.append(total_loss) tower_grads.append(gradients) tower_slogits.append(slogits) # END WITH # END WITH # END FOR """ After: 1) Computing gradients and losses need to be stored and averaged 2) Clip gradients by norm to required value 3) Apply mean gradient updates """ gradients = _average_gradients(tower_grads) gradients, variables = zip(*gradients) clipped_gradients, _ = clip_ops.clip_by_global_norm( gradients, grad_clip_value) gradients = list(zip(clipped_gradients, variables)) grad_updates = opt.apply_gradients(gradients, global_step=global_step, name="train") train_op = grad_updates ############################################################################################################################################ if save_bool: ######################### Logger Setup block ###################################### # Logging setup initialization (Naming format: Date, month, hour, minute, second) log_name = ( "exp_train_%s_%s_%s" % (time.strftime("%d_%m_%H_%M_%S"), dataset, experiment_name)) make_dir('results') make_dir(os.path.join('results', model.name)) make_dir(os.path.join('results', model.name, dataset)) make_dir( os.path.join('results', model.name, dataset, preproc_method)) make_dir( os.path.join('results', model.name, dataset, preproc_method, experiment_name)) make_dir( os.path.join('results', model.name, dataset, preproc_method, experiment_name, 'checkpoints')) curr_logger = Logger( os.path.join('logs', model.name, dataset, preproc_method, metrics_dir, log_name)) #################################################################################### # END IF init = tf.global_variables_initializer() coord = tf.train.Coordinator() threads = queue_runner_impl.start_queue_runners(sess=sess, coord=coord) # Variables get randomly initialized into tf graph sess.run(init) # Check that weights were loaded or random initializations are requested if ((ckpt == None) or (random_init)): print "Caution: Model weights are not being loaded, using random initialization." else: # Model variables initialized from previous saved models initialize_from_dict(sess, ckpt, model.name) # END IF del ckpt # Initialize tracking variables previous_vid_name = "" videos_loaded = 0 tot_count = 0 acc = 0 epoch_count = 0 tot_load_time = 0.0 tot_train_time = 0.0 last_loss = None losses = [] total_pred = [] save_data = [] total_params = [] losses_tracker = [] # Timing test setup time_init = time.time() batch_count = 0 epoch_acc = 0 l_r = learning_rate_init ########################################## Training loop block ################################################################ # Loop epoch number of time over the training set while videos_loaded < n_epochs * num_vids: # Variable to update during epoch intervals if (epoch_count + 1) * num_vids <= videos_loaded < ( epoch_count + 1) * num_vids + num_gpus * batch_size: batch_count = 0 epoch_acc = 0 if epoch_count % save_freq == 0 and tot_count > 0: if save_bool: if verbose: print "Saving..." save_checkpoint(sess, model.name, dataset, experiment_name, preproc_method, l_r, global_step.eval(session=sess)) # END IF epoch_count += 1 # END IF time_pre_train = time.time() ######################################### Running TF training session block ################################## _, loss_train, predictions, gs, labels, vid_names, l_r, track_vars = sess.run( [ train_op, tower_losses, tower_slogits, global_step, labels_tensor, names_tensor, learning_rate, model.get_track_variables() ]) ################################################################################################################ if verbose: print vid_names for name in vid_names: if name != previous_vid_name: videos_loaded += 1 previous_vid_name = name tot_count += 1 ######## Adaptive Learning Rate Control Block ############################ losses_tracker.append(np.mean(loss_train)) if videos_loaded % 10 == 0 and videos_loaded > 0: if last_loss is None: last_loss = sum(losses_tracker) / 10 else: difference_loss = last_loss - sum(losses_tracker) / 10 last_loss = sum(losses_tracker) / 10 if abs(difference_loss) < 0.001: learning_rate /= 10 # END IF # END IF if len(losses_tracker) == 10: losses_tracker = [] # END IF # END IF ########################################################################### # Transpose the extracted layers such that the mean is taken across the gpus and over any matrix with more than 1 dimension params_array = [] for key in track_vars.keys(): curr_params = np.array(track_vars[key]) if len(curr_params.shape) > 1: indices = np.arange(len(curr_params.shape)) + 1 indices[-1] = 0 curr_params = curr_params.transpose(indices) params_array.append( np.mean(curr_params, axis=tuple(range(len(curr_params.shape))[1:]))) else: params_array.append([np.mean(curr_params)]) # END IF # END FOR #################### Training accuracy computation block ############### # Compute training epoch accuracy for gpu_pred_idx in range(len(predictions)): for batch_idx in range(predictions[gpu_pred_idx].shape[0]): pred = np.mean(predictions[gpu_pred_idx][batch_idx], 0).argmax() if pred == labels[gpu_pred_idx * batch_size + batch_idx][0]: epoch_acc += 1 # END IF batch_count += 1 # END FOR # END FOR ###################### Add variables to be tracked to logger ############# time_post_train = time.time() tot_train_time += time_post_train - time_pre_train if verbose: print 'train_time: ', time_post_train - time_pre_train print 'step, loss: ', gs, loss_train print 'labels: ', labels # END IF if save_bool: curr_logger.add_scalar_value('train/train_time', time_post_train - time_pre_train, step=gs) curr_logger.add_scalar_value('train/loss', float(np.mean(loss_train)), step=gs) curr_logger.add_scalar_value('train/epoch_acc', epoch_acc / float(batch_count), step=gs) for layer in range(len(params_array)): for p in range(len(params_array[layer])): curr_logger.add_scalar_value( 'tracked_training_variables/' + str(track_vars.keys()[layer] + '_' + str(p)), float(params_array[layer][p]), step=gs) # END FOR # END FOR total_params.append(params_array) curr_logger.add_scalar_value( 'tracked_training_variables/learning_rate', float(l_r), step=gs) # END IF # END WHILE ######################################################################################################################################################### if save_bool: if verbose: print "Saving..." # END IF save_checkpoint(sess, model.name, dataset, experiment_name, preproc_method, l_r, gs) coord.request_stop() coord.join(threads) # END IF if verbose: print "Tot train time: ", tot_train_time print "Tot time: ", time.time() - time_init # END WITH if save_bool: # Save tracked parameterization variables as a numpy file if len(total_params) != 0: total_params = np.array(total_params).flatten() make_dir( os.path.join('results', model.name, dataset, preproc_method, experiment_name, metrics_dir)) if os.path.isfile( os.path.join('results', model.name, dataset, preproc_method, experiment_name, metrics_dir, 'train_params_' + dataset + '.npy')): loaded_params = np.load( os.path.join('results', model.name, dataset, preproc_method, experiment_name, metrics_dir, 'train_params_' + dataset + '.npy')) total_params = np.concatenate( [loaded_params, total_params]) # END IF np.save( os.path.join('results', model.name, dataset, preproc_method, experiment_name, metrics_dir, 'train_params_' + dataset + '.npy'), total_params)
def test_read_text_lines_large(self): gfile.Glob = self._orig_glob sequence_prefix = "abcdefghijklmnopqrstuvwxyz123456789" num_records = 49999 lines = [ "".join([sequence_prefix, str(l)]).encode("ascii") for l in xrange(num_records) ] json_lines = [ "".join([ '{"features": { "feature": { "sequence": {', '"bytes_list": { "value": ["', base64.b64encode(l).decode("ascii"), '"]}}}}}\n' ]) for l in lines ] filename = self._create_temp_file("".join(json_lines)) batch_size = 10000 queue_capacity = 10000 name = "my_large_batch" features = { "sequence": parsing_ops.FixedLenFeature([], dtypes_lib.string) } with ops.Graph().as_default() as g, self.test_session( graph=g) as session: keys, result = graph_io.read_keyed_batch_features( filename, batch_size, features, io_ops.TextLineReader, randomize_input=False, num_epochs=1, queue_capacity=queue_capacity, num_enqueue_threads=2, parse_fn=parsing_ops.decode_json_example, name=name) self.assertAllEqual((None, ), keys.get_shape().as_list()) self.assertEqual(1, len(result)) self.assertAllEqual((None, ), result["sequence"].get_shape().as_list()) session.run(variables.local_variables_initializer()) coord = coordinator.Coordinator() threads = queue_runner_impl.start_queue_runners(session, coord=coord) data = [] try: while not coord.should_stop(): data.append(session.run(result)) except errors.OutOfRangeError: pass finally: coord.request_stop() coord.join(threads) parsed_records = [ item for sublist in [d["sequence"] for d in data] for item in sublist ] # Check that the number of records matches expected and all records # are present. self.assertEqual(len(parsed_records), num_records) self.assertEqual(set(parsed_records), set(lines))
def _input_statistics_test_template(self, stat_object, num_features, dtype, give_full_data, warmup_iterations=0, rtol=1e-6, data_length=500, chunk_size=4): graph = ops.Graph() with graph.as_default(): numpy_dtype = dtype.as_numpy_dtype values = ( (numpy.arange(data_length, dtype=numpy_dtype)[..., None] + numpy.arange(num_features, dtype=numpy_dtype)[None, ...])[None]) times = 2 * (numpy.arange(data_length)[None]) - 3 if give_full_data: stat_object.set_data((times, values)) features = { TrainEvalFeatures.TIMES: times, TrainEvalFeatures.VALUES: values } input_fn = input_pipeline.RandomWindowInputFn( batch_size=16, window_size=chunk_size, time_series_reader=input_pipeline.NumpyReader(features)) statistics = stat_object.initialize_graph(features=input_fn()[0]) with self.session(graph=graph) as session: variables.global_variables_initializer().run() coordinator = coordinator_lib.Coordinator() queue_runner_impl.start_queue_runners(session, coord=coordinator) for _ in range(warmup_iterations): # A control dependency should ensure that, for queue-based statistics, # a use of any statistic is preceded by an update of all adaptive # statistics. statistics.total_observation_count.eval() self.assertAllClose( range(num_features) + numpy.mean(numpy.arange(chunk_size))[None], statistics.series_start_moments.mean.eval(), rtol=rtol) self.assertAllClose( numpy.tile( numpy.var(numpy.arange(chunk_size))[None], [num_features]), statistics.series_start_moments.variance.eval(), rtol=rtol) self.assertAllClose( numpy.mean(values[0], axis=0), statistics.overall_feature_moments.mean.eval(), rtol=rtol) self.assertAllClose( numpy.var(values[0], axis=0), statistics.overall_feature_moments.variance.eval(), rtol=rtol) self.assertAllClose(-3, statistics.start_time.eval(), rtol=rtol) self.assertAllClose(data_length, statistics.total_observation_count.eval(), rtol=rtol) coordinator.request_stop() coordinator.join()
with tf.variable_scope('Loss'): loss = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits(labels=batched_y, logits=batched_x)) optimizer = tf.train.AdamOptimizer(0.1) train_op = optimizer.minimize(loss) tf.summary.scalar('Loss', loss) merged = tf.summary.merge_all() t1 = time.time() sess = tf.Session() checkpoint_path = os.path.join(logdir, scale + '_model') writer = tf.summary.FileWriter(logdir, sess.graph) sess.run( [tf.global_variables_initializer(), tf.local_variables_initializer()]) coord = tf.train.Coordinator() threads = queue_runner_impl.start_queue_runners(sess=sess) saver = saver_lib.Saver(write_version=saver_pb2.SaverDef.V2) saver.save(sess, checkpoint_path) for i in range(num_batches * num_epochs): l, _, summary = sess.run([loss, train_op, merged]) writer.add_summary(summary, i) print 'batch ' + str(i + 1) + '/' + str( num_batches * num_epochs) + '\tLoss:' + str(l) writer.close() coord.request_stop() coord.join(threads) print 'program takes time:' + str(time.time() - t1)
def _train_on_generated_data( generate_fn, generative_model, train_iterations, seed, learning_rate=0.1, ignore_params_fn=lambda _: (), derived_param_test_fn=lambda _: (), train_input_fn_type=input_pipeline.WholeDatasetInputFn, train_state_manager=state_management.PassthroughStateManager()): """The training portion of parameter recovery tests.""" random_seed.set_random_seed(seed) generate_graph = ops.Graph() with generate_graph.as_default(): with session.Session(graph=generate_graph): generative_model.initialize_graph() time_series_reader, true_parameters = generate_fn(generative_model) true_parameters = { tensor.name: value for tensor, value in true_parameters.items()} eval_input_fn = input_pipeline.WholeDatasetInputFn(time_series_reader) eval_state_manager = state_management.PassthroughStateManager() true_parameter_eval_graph = ops.Graph() with true_parameter_eval_graph.as_default(): generative_model.initialize_graph() ignore_params = ignore_params_fn(generative_model) feature_dict, _ = eval_input_fn() eval_state_manager.initialize_graph(generative_model) feature_dict[TrainEvalFeatures.VALUES] = math_ops.cast( feature_dict[TrainEvalFeatures.VALUES], generative_model.dtype) model_outputs = eval_state_manager.define_loss( model=generative_model, features=feature_dict, mode=estimator_lib.ModeKeys.EVAL) with session.Session(graph=true_parameter_eval_graph) as sess: variables.global_variables_initializer().run() coordinator = coordinator_lib.Coordinator() queue_runner_impl.start_queue_runners(sess, coord=coordinator) true_param_loss = model_outputs.loss.eval(feed_dict=true_parameters) true_transformed_params = { param: param.eval(feed_dict=true_parameters) for param in derived_param_test_fn(generative_model)} coordinator.request_stop() coordinator.join() saving_hook = _SavingTensorHook( tensors=true_parameters.keys(), every_n_iter=train_iterations - 1) class _RunConfig(estimator_lib.RunConfig): @property def tf_random_seed(self): return seed estimator = estimators.TimeSeriesRegressor( model=generative_model, config=_RunConfig(), state_manager=train_state_manager, optimizer=adam.AdamOptimizer(learning_rate)) train_input_fn = train_input_fn_type(time_series_reader=time_series_reader) trained_loss = (estimator.train( input_fn=train_input_fn, max_steps=train_iterations, hooks=[saving_hook]).evaluate( input_fn=eval_input_fn, steps=1))["loss"] logging.info("Final trained loss: %f", trained_loss) logging.info("True parameter loss: %f", true_param_loss) return (ignore_params, true_parameters, true_transformed_params, trained_loss, true_param_loss, saving_hook, true_parameter_eval_graph)
def _testBasics(self, num_unroll, length, pad, expected_seq1_batch1, expected_seq2_batch1, expected_seq1_batch2, expected_seq2_batch2, expected_seq3_batch1, expected_seq3_batch2, expected_seq4_batch1, expected_seq4_batch2): with self.test_session() as sess: next_batch = sqss.batch_sequences_with_states( input_key=self.key, input_sequences=self.sequences, input_context=self.context, input_length=length, initial_states=self.initial_states, num_unroll=num_unroll, batch_size=self.batch_size, num_threads=3, # to enforce that we only move on to the next examples after finishing # all segments of the first ones. capacity=2, pad=pad) state1 = next_batch.state("state1") state2 = next_batch.state("state2") state1_update = next_batch.save_state("state1", state1 + 1) state2_update = next_batch.save_state("state2", state2 - 1) # Make sure queue runner with SQSS is added properly to meta graph def. # Saver requires at least one variable. v0 = variables.Variable(10.0, name="v0") ops.add_to_collection("variable_collection", v0) variables.global_variables_initializer() save = saver.Saver([v0]) test_dir = os.path.join(test.get_temp_dir(), "sqss_test") filename = os.path.join(test_dir, "metafile") meta_graph_def = save.export_meta_graph(filename) qr_saved = meta_graph_def.collection_def[ ops.GraphKeys.QUEUE_RUNNERS] self.assertTrue(qr_saved.bytes_list.value is not None) coord = coordinator.Coordinator() threads = queue_runner_impl.start_queue_runners(coord=coord) # Step 1 (key_value, next_key_value, seq1_value, seq2_value, seq3_value, seq4_value, context1_value, state1_value, state2_value, length_value, _, _) = sess.run( (next_batch.key, next_batch.next_key, next_batch.sequences["seq1"], next_batch.sequences["seq2"], next_batch.sequences["seq3"], next_batch.sequences["seq4"], next_batch.context["context1"], state1, state2, next_batch.length, state1_update, state2_update)) expected_first_keys = set([b"00000_of_00002"]) expected_second_keys = set([b"00001_of_00002"]) expected_final_keys = set([b"STOP"]) self.assertEqual(expected_first_keys, self._prefix(key_value)) self.assertEqual(expected_second_keys, self._prefix(next_key_value)) self.assertAllEqual( np.tile(self.context["context1"], (self.batch_size, 1)), context1_value) self.assertAllEqual(expected_seq1_batch1, seq1_value) self.assertAllEqual(expected_seq2_batch1, seq2_value) self.assertAllEqual(expected_seq3_batch1.indices, seq3_value.indices) self.assertAllEqual(expected_seq3_batch1.values, seq3_value.values) self.assertAllEqual(expected_seq3_batch1.dense_shape, seq3_value.dense_shape) self.assertAllEqual(expected_seq4_batch1.indices, seq4_value.indices) self.assertAllEqual(expected_seq4_batch1.values, seq4_value.values) self.assertAllEqual(expected_seq4_batch1.dense_shape, seq4_value.dense_shape) self.assertAllEqual( np.tile(self.initial_states["state1"], (self.batch_size, 1, 1)), state1_value) self.assertAllEqual( np.tile(self.initial_states["state2"], (self.batch_size, 1)), state2_value) self.assertAllEqual(length_value, [num_unroll, num_unroll]) # Step 2 (key_value, next_key_value, seq1_value, seq2_value, seq3_value, seq4_value, context1_value, state1_value, state2_value, length_value, _, _) = sess.run( (next_batch.key, next_batch.next_key, next_batch.sequences["seq1"], next_batch.sequences["seq2"], next_batch.sequences["seq3"], next_batch.sequences["seq4"], next_batch.context["context1"], state1, state2, next_batch.length, state1_update, state2_update)) self.assertEqual(expected_second_keys, self._prefix(key_value)) self.assertEqual(expected_final_keys, self._prefix(next_key_value)) self.assertAllEqual( np.tile(self.context["context1"], (self.batch_size, 1)), context1_value) self.assertAllEqual(expected_seq1_batch2, seq1_value) self.assertAllEqual(expected_seq2_batch2, seq2_value) self.assertAllEqual(expected_seq3_batch2.indices, seq3_value.indices) self.assertAllEqual(expected_seq3_batch2.values, seq3_value.values) self.assertAllEqual(expected_seq3_batch2.dense_shape, seq3_value.dense_shape) self.assertAllEqual(expected_seq4_batch2.indices, seq4_value.indices) self.assertAllEqual(expected_seq4_batch2.values, seq4_value.values) self.assertAllEqual(expected_seq4_batch2.dense_shape, seq4_value.dense_shape) self.assertAllEqual( 1 + np.tile(self.initial_states["state1"], (self.batch_size, 1, 1)), state1_value) self.assertAllEqual( -1 + np.tile(self.initial_states["state2"], (self.batch_size, 1)), state2_value) self.assertAllEqual([1, 1], length_value) coord.request_stop() coord.join(threads, stop_grace_period_secs=2)
def test(model, input_dims, output_dims, seq_length, size, dataset, loaded_dataset, experiment_name, num_vids, split, base_data_path, f_name, load_model, return_layer, clip_length, video_offset, clip_offset, num_clips, clip_stride, metrics_method, batch_size, metrics_dir, loaded_checkpoint, verbose, gpu_list, preproc_method, loaded_preproc, random_init, avg_clips, use_softmax, preproc_debugging, reverse, topk): """ Function used to test the performance and analyse a chosen model Args: :model: tf-activity-recognition framework model object :input_dims: Number of frames used in input :output_dims: Integer number of classes in current dataset :seq_length: Length of output sequence expected from LSTM :size: List detailing height and width of frame :dataset: Name of dataset being loaded :loaded_dataset: Name of dataset which was used to train the current model :experiment_name: Name of current experiment :num_vids: Number of videos to be used for testing :split: Split of dataset being used :base_data_path: Full path to root directory containing datasets :f_name: Specific video directory within a chosen split of a dataset :load_model: Boolean variable indicating whether to load from a checkpoint or not :return_layer: Layer to return from the model, used to extract features :clip_length: Length of clips to cut video into, -1 indicates using the entire video as one clip') :video_offset: String indicating where to begin selecting video clips (provided clipOffset is None) :clip_offset: "none" or "random" indicating where to begin selecting video clips :num_clips: Number of clips to break video into :clip_stride: Number of frames that overlap between clips, 0 indicates no overlap and negative values indicate a gap of frames between clips :metrics_method: Which method to use to calculate accuracy metrics. ("default" or "svm") :batch_size: Number of clips to load into the model each step. :metrics_dir: Name of subdirectory within the experiment to store metrics. Unique directory names allow for parallel testing :loaded_checkpoint: Specify the exact checkpoint of saved model to be loaded for further training/testing :verbose: Boolean to indicate if all print statement should be procesed or not :gpu_list: List of GPU IDs to be used :preproc_method: The preprocessing method to use, default, cvr, rr, sr, or any other custom preprocessing :loaded_preproc: Name of preproc method which was used to train the current model :random_init: Randomly initialize model weights, not loading from any files (deafult False) :avg_clips: Binary boolean indicating whether to average predictions across clips :use_softmax: Binary boolean indicating whether to apply softmax to the inference of the model :preproc_debugging: Boolean indicating whether to load videos and clips in a queue or to load them directly for debugging (Default 0) Returns: Does not return anything """ with tf.name_scope("my_scope") as scope: # Initializers for checkpoint and global step variable ckpt = None gs_init = 0 ################################### Checkpoint loading block ####################################################### # Load pre-trained/saved model if load_model: try: ckpt, gs_init, learning_rate_init = load_checkpoint( model.name, loaded_dataset, experiment_name, loaded_checkpoint, loaded_preproc) if verbose: print 'A better checkpoint is found. The global_step value is: ' + str( gs_init) except: if verbose: print "Failed loading checkpoint requested. Please check." exit() # END TRY else: ckpt = model.load_default_weights() # END IF ###################################################################################################################### # Initialize model variables istraining = False global_step = tf.Variable(gs_init, name='global_step', trainable=False) number_of_videos = tf.Variable(num_vids, name='number_of_videos', trainable=False) video_step = tf.Variable(1.0, name='video_step', trainable=False) # TF session setup config = tf.ConfigProto(allow_soft_placement=True) sess = tf.Session(config=config) init = tf.global_variables_initializer() # Variables get randomly initialized into tf graph sess.run(init) data_path = os.path.join(base_data_path, 'tfrecords_' + dataset, 'Split' + str(split), f_name) # Setting up tensors for models # input_data_tensor - [batchSize, inputDims, height, width, channels] input_data_tensor, labels_tensor, names_tensor = load_dataset( model, 1, batch_size, output_dims, input_dims, seq_length, size, data_path, dataset, istraining, clip_length, video_offset, clip_offset, num_clips, clip_stride, video_step, preproc_debugging, 0, verbose, reverse=reverse) ######### GPU list check block #################### assert (len(gpu_list) <= 1) if len(gpu_list) == 0: gpu_list = ['0'] # Default choice is ID = 0 # END IF ################################################### ################################################## Setup TF graph block ###################################################### # Model Inference with tf.device('/gpu:' + gpu_list[0]): logits = model.inference( input_data_tensor[0:batch_size, :, :, :, :], istraining, input_dims, output_dims, seq_length, scope, return_layer=return_layer)[0] # Logits shape: [batchSize, seqLength, outputDims] if not, reshape logits_shape = logits.get_shape().as_list() if (logits_shape[0] != batch_size or logits_shape[1] != seq_length or logits_shape[2] != output_dims ) and return_layer[0] == 'logits': logits = tf.reshape(logits, [batch_size, seq_length, output_dims]) # END IF if use_softmax: logits = tf.nn.softmax(logits) # END IF # END WITH ############################################################################################################################################ if save_bool: ######################### Logger Setup block ###################################### # Logger setup (Name format: Date, month, hour, minute and second, with a prefix of exp_test) log_name = ("exp_test_%s_%s_%s_%s_%s" % (time.strftime("%d_%m_%H_%M_%S"), dataset, preproc_method, experiment_name, metrics_method)) curr_logger = Logger( os.path.join('logs', model.name, dataset, preproc_method, metrics_dir, log_name)) make_dir(os.path.join('results', model.name)) make_dir(os.path.join('results', model.name, dataset)) make_dir( os.path.join('results', model.name, dataset, preproc_method)) make_dir( os.path.join('results', model.name, dataset, preproc_method, experiment_name)) make_dir( os.path.join('results', model.name, dataset, preproc_method, experiment_name, metrics_dir)) ################################################################################### # TF session setup #sess = tf.Session() init = (tf.global_variables_initializer(), tf.local_variables_initializer()) coord = tf.train.Coordinator() threads = queue_runner_impl.start_queue_runners(sess=sess, coord=coord) if save_bool: metrics = Metrics(output_dims, seq_length, curr_logger, metrics_method, istraining, model.name, experiment_name, preproc_method, dataset, metrics_dir, verbose=verbose, topk=topk) # Variables get randomly initialized into tf graph sess.run(init) # Check that weights were loaded or random initializations are requested if ((ckpt == None) or (random_init)): print "Caution: Model weights are not being loaded, using random initialization." else: # Model variables initialized from previous saved models initialize_from_dict(sess, ckpt, model.name) # END IF del ckpt acc = 0 count = 0 videos_loaded = 0 previous_vid_name = '' total_pred = [] if verbose: print "Begin Testing" # END IF ########################################## Testing loop block ################################################################ while videos_loaded <= num_vids: output_predictions, labels, names = sess.run( [logits, labels_tensor, names_tensor]) if avg_clips: output_predictions = np.array([np.mean(output_predictions, 0)]) names = names[:1] for batch_idx in range(len(names)): vid_name = names[batch_idx] if vid_name != previous_vid_name: previous_vid_name = vid_name videos_loaded += 1 if verbose: print "Number of videos loaded: ", videos_loaded # Extract remaining clips from currently loaded video, once it finishes exit while loop if videos_loaded > num_vids: break count += 1 if save_bool: metrics.log_prediction(labels[batch_idx][0], output_predictions[batch_idx], vid_name, count) # END IF # END WHILE ######################################################################################################################################################### # END WITH coord.request_stop() coord.join(threads) if save_bool: total_accuracy = metrics.total_classification() total_pred = metrics.get_predictions_array() if verbose: print "Total accuracy : ", total_accuracy print total_pred # Save results in numpy format np.save( os.path.join( 'results', model.name, dataset, preproc_method, experiment_name, metrics_dir, 'test_predictions_' + dataset + "_" + metrics_method + '.npy'), np.array(total_pred))
def example_video(dataset, num_vids, split, base_data_path, f_name, vid_name, verbose): """ Function used to test the performance and analyse a chosen model Args: :dataset: Name of dataset being loaded :num_vids: Number of videos to be used for training :split: Split of dataset being used :base_data_path: Full path to root directory containing datasets :f_name: Specific video directory within a chosen split of a dataset :vid_name: Name of video to load if desired :verbose: Boolean to indicate if all print statement should be procesed or not Returns: Does not return anything """ with tf.name_scope("my_scope") as scope: # Initialize model variables istraining = False data_path = os.path.join(base_data_path, 'tfrecords_'+dataset, 'Split'+str(split), f_name) # Setting up tensors for models input_data_tensor, labels_tensor, names_tensor = load_dataset_without_preprocessing(data_path, dataset, istraining, vid_name, verbose) # TF session setup config = tf.ConfigProto(allow_soft_placement=True) sess = tf.Session(config=config) init = (tf.global_variables_initializer(), tf.local_variables_initializer()) coord = tf.train.Coordinator() threads = queue_runner_impl.start_queue_runners(sess=sess, coord=coord) # Variables get randomly initialized into tf graph sess.run(init) acc = 0 count = 0 videos_loaded = 0 previous_vid_name = '' total_pred = [] if verbose: print "Begin Testing" # END IF ########################################## Testing loop block ################################################################ while videos_loaded <= num_vids: output, labels, names = sess.run([input_data_tensor, labels_tensor, names_tensor]) import pdb; pdb.set_trace() for batch_idx in range(len(names)): vid_name = names[batch_idx] if vid_name != previous_vid_name: previous_vid_name = vid_name videos_loaded += 1 if verbose: print "Number of videos loaded: ", videos_loaded # Extract remaining clips from currently loaded video, once it finishes exit while loop if videos_loaded > num_vids: break count += 1 # END IF # END WHILE ######################################################################################################################################################### # END WITH coord.request_stop() coord.join(threads)
def test_keyed_features_filter(self): gfile.Glob = self._orig_glob lines = [ '{"features": {"feature": {"age": {"int64_list": {"value": [2]}}}}}', '{"features": {"feature": {"age": {"int64_list": {"value": [0]}}}}}', '{"features": {"feature": {"age": {"int64_list": {"value": [1]}}}}}', '{"features": {"feature": {"age": {"int64_list": {"value": [0]}}}}}', '{"features": {"feature": {"age": {"int64_list": {"value": [3]}}}}}', '{"features": {"feature": {"age": {"int64_list": {"value": [5]}}}}}' ] filename = self._create_temp_file("\n".join(lines)) batch_size = 2 queue_capacity = 4 name = "my_batch" features = {"age": parsing_ops.FixedLenFeature([], dtypes_lib.int64)} def filter_fn(keys, examples_json): del keys serialized = parsing_ops.decode_json_example(examples_json) examples = parsing_ops.parse_example(serialized, features) return math_ops.less(examples["age"], 2) with ops.Graph().as_default() as g, self.test_session( graph=g) as session: keys, inputs = graph_io._read_keyed_batch_examples_helper( filename, batch_size, reader=io_ops.TextLineReader, randomize_input=False, num_epochs=1, read_batch_size=batch_size, queue_capacity=queue_capacity, filter_fn=filter_fn, name=name) self.assertAllEqual((None, ), keys.get_shape().as_list()) self.assertAllEqual((None, ), inputs.get_shape().as_list()) session.run(variables.local_variables_initializer()) coord = coordinator.Coordinator() threads = queue_runner_impl.start_queue_runners(session, coord=coord) # First batch of two filtered examples. out_keys, out_vals = session.run((keys, inputs)) self.assertAllEqual([ filename.encode("utf-8") + b":2", filename.encode("utf-8") + b":3" ], out_keys) self.assertAllEqual( [lines[1].encode("utf-8"), lines[2].encode("utf-8")], out_vals) # Second batch will only have one filtered example as that's the only # remaining example that satisfies the filtering criterion. out_keys, out_vals = session.run((keys, inputs)) self.assertAllEqual([filename.encode("utf-8") + b":4"], out_keys) self.assertAllEqual([lines[3].encode("utf-8")], out_vals) # Exhausted input. with self.assertRaises(errors.OutOfRangeError): session.run((keys, inputs)) coord.request_stop() coord.join(threads)