Ejemplo n.º 1
0
  def testFeederActsLikeQueue(self):
    # Tests that a feeder acts like a queue
    feeder = feeder_lib.Feeder(
        dtypes=[dtypes_lib.string, dtypes_lib.string],
        shapes=[[], []],
        capacity=10)

    feeder.set_many_fed_tensors([
        constant_op.constant(['a0', 'a1', 'a2']),
        constant_op.constant(['b0', 'b1', 'b2'])
    ])

    out_a, out_b = feeder.get_fed_tensors()

    with self.test_session() as session:
      coord = coordinator.Coordinator()
      queue_runner_impl.start_queue_runners(session, coord=coord)

      a, b = session.run([out_a, out_b])
      self.assertEquals(b'a0', a)
      self.assertEquals(b'b0', b)
      a = session.run(out_a)  # Omit b!
      self.assertEquals(b'a1', a)
      a, b = session.run([out_a, out_b])
      self.assertEquals(b'a2', a)
      self.assertEquals(b'b2', b)  # queued together
      a, b = session.run([out_a, out_b])  # loops around
      self.assertEquals(b'a0', a)
      self.assertEquals(b'b0', b)  # queued together

    coord.request_stop()
    coord.join()
Ejemplo n.º 2
0
 def test_long_eval_discard_indivisible(self):
   g = ops.Graph()
   with g.as_default():
     model = ARModel(periodicities=2,
                     num_features=1,
                     num_time_buckets=10,
                     input_window_size=2,
                     output_window_size=2)
     raw_features = {
         TrainEvalFeatures.TIMES: [[1, 3, 5, 7, 11]],
         TrainEvalFeatures.VALUES: [[[1.], [2.], [3.], [4.], [5.]]]}
     model.initialize_graph()
     raw_evaluation = model.define_loss(
         raw_features, mode=estimator_lib.ModeKeys.EVAL)
     with session.Session() as sess:
       coordinator = coordinator_lib.Coordinator()
       queue_runner_impl.start_queue_runners(sess, coord=coordinator)
       variables.global_variables_initializer().run()
       raw_evaluation_evaled = sess.run(raw_evaluation)
       self.assertAllEqual([[7, 11]],
                           raw_evaluation_evaled.prediction_times)
       for feature_name in raw_evaluation.predictions:
         self.assertAllEqual(
             [1, 2, 1],  # batch, window, num_features. The window has two cut
                         # off for the first input window and one discarded so
                         # that the remainder is divisible into output windows.
             raw_evaluation_evaled.predictions[feature_name].shape)
       coordinator.request_stop()
       coordinator.join()
  def _test_pass_to_next(self, read_offset, step, correct_offset):
    stub_model = StubTimeSeriesModel(correct_offset=correct_offset)
    data = self._make_test_data(
        length=100 + read_offset, cut_start=None, cut_end=None, offset=100.,
        step=step)
    init_input_fn = input_pipeline.WholeDatasetInputFn(
        input_pipeline.NumpyReader(
            {k: v[:-read_offset] for k, v in data.items()}))
    result_input_fn = input_pipeline.WholeDatasetInputFn(
        input_pipeline.NumpyReader(
            {k: v[read_offset:] for k, v in data.items()}))

    chainer = state_management.ChainingStateManager(
        state_saving_interval=1)
    stub_model.initialize_graph()
    chainer.initialize_graph(model=stub_model)
    init_model_outputs = chainer.define_loss(
        model=stub_model, features=init_input_fn()[0],
        mode=estimator_lib.ModeKeys.TRAIN)
    result_model_outputs = chainer.define_loss(
        model=stub_model, features=result_input_fn()[0],
        mode=estimator_lib.ModeKeys.TRAIN)
    with self.test_session() as session:
      variables.global_variables_initializer().run()
      coordinator = coordinator_lib.Coordinator()
      queue_runner_impl.start_queue_runners(session, coord=coordinator)
      init_model_outputs.loss.eval()
      returned_loss = result_model_outputs.loss.eval()
      coordinator.request_stop()
      coordinator.join()
      return returned_loss
Ejemplo n.º 4
0
 def _random_window_input_fn_test_template(
     self, time_series_reader, window_size, batch_size, num_features,
     discard_out_of_order=False):
   input_fn = input_pipeline.RandomWindowInputFn(
       time_series_reader=time_series_reader,
       window_size=window_size, batch_size=batch_size)
   result, _ = input_fn()
   init_op = variables.local_variables_initializer()
   with self.cached_session() as session:
     coordinator = coordinator_lib.Coordinator()
     queue_runner_impl.start_queue_runners(session, coord=coordinator)
     session.run(init_op)
     features = session.run(result)
     coordinator.request_stop()
     coordinator.join()
   self.assertAllEqual([batch_size, window_size],
                       features[TrainEvalFeatures.TIMES].shape)
   for window_position in range(window_size - 1):
     for batch_position in range(batch_size):
       # Checks that all times are contiguous
       self.assertEqual(
           features[TrainEvalFeatures.TIMES][batch_position,
                                             window_position + 1],
           features[TrainEvalFeatures.TIMES][batch_position,
                                             window_position] + 1)
   self.assertAllEqual([batch_size, window_size, num_features],
                       features[TrainEvalFeatures.VALUES].shape)
   self.assertEqual("int64", features[TrainEvalFeatures.TIMES].dtype)
   for feature_number in range(num_features):
     self.assertAllEqual(
         features[TrainEvalFeatures.TIMES] * 2. + feature_number,
         features[TrainEvalFeatures.VALUES][:, :, feature_number])
   return features
 def _test_initialization(self, warmup_iterations, batch_size):
   stub_model = StubTimeSeriesModel()
   data = self._make_test_data(length=20, cut_start=None, cut_end=None,
                               offset=0.)
   if batch_size == -1:
     input_fn = test_utils.AllWindowInputFn(
         input_pipeline.NumpyReader(data), window_size=10)
   else:
     input_fn = input_pipeline.RandomWindowInputFn(
         input_pipeline.NumpyReader(data),
         window_size=10,
         batch_size=batch_size)
   chainer = state_management.ChainingStateManager(
       state_saving_interval=1)
   features, _ = input_fn()
   stub_model.initialize_graph()
   chainer.initialize_graph(model=stub_model)
   model_outputs = chainer.define_loss(
       model=stub_model, features=features, mode=estimator_lib.ModeKeys.TRAIN)
   with self.test_session() as session:
     variables.global_variables_initializer().run()
     coordinator = coordinator_lib.Coordinator()
     queue_runner_impl.start_queue_runners(session, coord=coordinator)
     for _ in range(warmup_iterations):
       # Warm up saved state
       model_outputs.loss.eval()
     outputs = model_outputs.loss.eval()
     coordinator.request_stop()
     coordinator.join()
     return outputs
Ejemplo n.º 6
0
 def _gap_test_template(self, times, values):
   random_model = RandomStateSpaceModel(
       state_dimension=1, state_noise_dimension=1,
       configuration=state_space_model.StateSpaceModelConfiguration(
           num_features=1))
   random_model.initialize_graph()
   input_fn = input_pipeline.WholeDatasetInputFn(
       input_pipeline.NumpyReader({
           feature_keys.TrainEvalFeatures.TIMES: times,
           feature_keys.TrainEvalFeatures.VALUES: values
       }))
   features, _ = input_fn()
   times = features[feature_keys.TrainEvalFeatures.TIMES]
   values = features[feature_keys.TrainEvalFeatures.VALUES]
   model_outputs = random_model.get_batch_loss(
       features={
           feature_keys.TrainEvalFeatures.TIMES: times,
           feature_keys.TrainEvalFeatures.VALUES: values
       },
       mode=None,
       state=math_utils.replicate_state(
           start_state=random_model.get_start_state(),
           batch_size=array_ops.shape(times)[0]))
   with self.cached_session() as session:
     variables.global_variables_initializer().run()
     coordinator = coordinator_lib.Coordinator()
     queue_runner_impl.start_queue_runners(session, coord=coordinator)
     model_outputs.loss.eval()
     coordinator.request_stop()
     coordinator.join()
Ejemplo n.º 7
0
 def _all_window_input_fn_test_template(
     self, time_series_reader, num_samples, window_size,
     original_numpy_features=None):
   input_fn = test_utils.AllWindowInputFn(
       time_series_reader=time_series_reader,
       window_size=window_size)
   features, _ = input_fn()
   init_op = variables.local_variables_initializer()
   with self.cached_session() as session:
     coordinator = coordinator_lib.Coordinator()
     queue_runner_impl.start_queue_runners(session, coord=coordinator)
     session.run(init_op)
     chunked_times, chunked_values = session.run(
         [features[TrainEvalFeatures.TIMES],
          features[TrainEvalFeatures.VALUES]])
     coordinator.request_stop()
     coordinator.join()
   self.assertAllEqual([num_samples - window_size + 1, window_size],
                       chunked_times.shape)
   if original_numpy_features is not None:
     original_times = original_numpy_features[TrainEvalFeatures.TIMES]
     original_values = original_numpy_features[TrainEvalFeatures.VALUES]
     self.assertAllEqual(original_times, numpy.unique(chunked_times))
     self.assertAllEqual(original_values[chunked_times],
                         chunked_values)
Ejemplo n.º 8
0
 def _equivalent_to_single_model_test_template(self, model_generator):
   with self.cached_session() as session:
     random_model = RandomStateSpaceModel(
         state_dimension=5,
         state_noise_dimension=4,
         configuration=state_space_model.StateSpaceModelConfiguration(
             dtype=dtypes.float64, num_features=1))
     random_model.initialize_graph()
     series_length = 10
     model_data = random_model.generate(
         number_of_series=1, series_length=series_length,
         model_parameters=random_model.random_model_parameters())
     input_fn = input_pipeline.WholeDatasetInputFn(
         input_pipeline.NumpyReader(model_data))
     features, _ = input_fn()
     model_outputs = random_model.get_batch_loss(
         features=features,
         mode=None,
         state=math_utils.replicate_state(
             start_state=random_model.get_start_state(),
             batch_size=array_ops.shape(
                 features[feature_keys.TrainEvalFeatures.TIMES])[0]))
     variables.global_variables_initializer().run()
     compare_outputs_evaled_fn = model_generator(
         random_model, model_data)
     coordinator = coordinator_lib.Coordinator()
     queue_runner_impl.start_queue_runners(session, coord=coordinator)
     compare_outputs_evaled = compare_outputs_evaled_fn(session)
     model_outputs_evaled = session.run(
         (model_outputs.end_state, model_outputs.predictions))
     coordinator.request_stop()
     coordinator.join()
     model_posteriors, model_predictions = model_outputs_evaled
     (_, compare_posteriors,
      compare_predictions) = compare_outputs_evaled
     (model_posterior_mean, model_posterior_var,
      model_from_time) = model_posteriors
     (compare_posterior_mean, compare_posterior_var,
      compare_from_time) = compare_posteriors
     self.assertAllClose(model_posterior_mean, compare_posterior_mean[0])
     self.assertAllClose(model_posterior_var, compare_posterior_var[0])
     self.assertAllClose(model_from_time, compare_from_time)
     self.assertEqual(sorted(model_predictions.keys()),
                      sorted(compare_predictions.keys()))
     for prediction_name in model_predictions:
       if prediction_name == "loss":
         # Chunking means that losses will be different; skip testing them.
         continue
       # Compare the last chunk to their corresponding un-chunked model
       # predictions
       last_prediction_chunk = compare_predictions[prediction_name][-1]
       comparison_values = last_prediction_chunk.shape[0]
       model_prediction = (
           model_predictions[prediction_name][0, -comparison_values:])
       self.assertAllClose(model_prediction,
                           last_prediction_chunk)
Ejemplo n.º 9
0
 def _input_statistics_test_template(
     self, stat_object, num_features, dtype, give_full_data,
     warmup_iterations=0, rtol=1e-6, data_length=500, chunk_size=4):
   graph = ops.Graph()
   with graph.as_default():
     numpy_dtype = dtype.as_numpy_dtype
     values = (
         (numpy.arange(data_length, dtype=numpy_dtype)[..., None]
          + numpy.arange(num_features, dtype=numpy_dtype)[None, ...])[None])
     times = 2 * (numpy.arange(data_length)[None]) - 3
     if give_full_data:
       stat_object.set_data((times, values))
     features = {TrainEvalFeatures.TIMES: times,
                 TrainEvalFeatures.VALUES: values}
     input_fn = input_pipeline.RandomWindowInputFn(
         batch_size=16, window_size=chunk_size,
         time_series_reader=input_pipeline.NumpyReader(features))
     statistics = stat_object.initialize_graph(
         features=input_fn()[0])
     with self.session(graph=graph) as session:
       variables.global_variables_initializer().run()
       coordinator = coordinator_lib.Coordinator()
       queue_runner_impl.start_queue_runners(session, coord=coordinator)
       for _ in range(warmup_iterations):
         # A control dependency should ensure that, for queue-based statistics,
         # a use of any statistic is preceded by an update of all adaptive
         # statistics.
         statistics.total_observation_count.eval()
       self.assertAllClose(
           range(num_features) + numpy.mean(numpy.arange(chunk_size))[None],
           statistics.series_start_moments.mean.eval(),
           rtol=rtol)
       self.assertAllClose(
           numpy.tile(numpy.var(numpy.arange(chunk_size))[None],
                      [num_features]),
           statistics.series_start_moments.variance.eval(),
           rtol=rtol)
       self.assertAllClose(
           numpy.mean(values[0], axis=0),
           statistics.overall_feature_moments.mean.eval(),
           rtol=rtol)
       self.assertAllClose(
           numpy.var(values[0], axis=0),
           statistics.overall_feature_moments.variance.eval(),
           rtol=rtol)
       self.assertAllClose(
           -3,
           statistics.start_time.eval(),
           rtol=rtol)
       self.assertAllClose(
           data_length,
           statistics.total_observation_count.eval(),
           rtol=rtol)
       coordinator.request_stop()
       coordinator.join()
Ejemplo n.º 10
0
 def testStartQueueRunnersRaisesIfNotASession(self):
   zero64 = constant_op.constant(0, dtype=dtypes.int64)
   var = variables.VariableV1(zero64)
   count_up_to = var.count_up_to(3)
   queue = data_flow_ops.FIFOQueue(10, dtypes.float32)
   init_op = variables.global_variables_initializer()
   qr = queue_runner_impl.QueueRunner(queue, [count_up_to])
   queue_runner_impl.add_queue_runner(qr)
   with self.cached_session():
     init_op.run()
     with self.assertRaisesRegexp(TypeError, "tf.Session"):
       queue_runner_impl.start_queue_runners("NotASession")
Ejemplo n.º 11
0
  def testExtendAfterQueueRunners(self):
    server = self._cached_server
    with session.Session(server.target) as sess:
      input_queue = input_ops.input_producer(constant_op.constant(
          [0.], dtype=dtypes.float32))
      self.assertIsNotNone(input_queue)

      var = variables.VariableV1(1., dtype=dtypes.float32, trainable=False,
                                 name="var")

      sess.run(variables.global_variables_initializer())
      queue_runner_impl.start_queue_runners(sess)
      sess.run(var.assign(3.0))
Ejemplo n.º 12
0
 def test_long_eval(self):
   g = ops.Graph()
   with g.as_default():
     model = ARModel(periodicities=2,
                     num_features=1,
                     num_time_buckets=10,
                     input_window_size=2,
                     output_window_size=1)
     raw_features = {
         TrainEvalFeatures.TIMES: [[1, 3, 5, 7, 11]],
         TrainEvalFeatures.VALUES: [[[1.], [2.], [3.], [4.], [5.]]]}
     chunked_features, _ = test_utils.AllWindowInputFn(
         time_series_reader=input_pipeline.NumpyReader(raw_features),
         window_size=3)()
     model.initialize_graph()
     with variable_scope.variable_scope("armodel") as scope:
       raw_evaluation = model.define_loss(
           raw_features, mode=estimator_lib.ModeKeys.EVAL)
     with variable_scope.variable_scope(scope, reuse=True):
       chunked_evaluation = model.define_loss(
           chunked_features, mode=estimator_lib.ModeKeys.EVAL)
     with session.Session() as sess:
       coordinator = coordinator_lib.Coordinator()
       queue_runner_impl.start_queue_runners(sess, coord=coordinator)
       variables.global_variables_initializer().run()
       raw_evaluation_evaled, chunked_evaluation_evaled = sess.run(
           [raw_evaluation, chunked_evaluation])
       self.assertAllEqual(chunked_evaluation_evaled.loss,
                           raw_evaluation_evaled.loss)
       last_chunk_evaluation_state = [
           state[-1, None] for state in
           chunked_evaluation_evaled.end_state]
       for last_chunk_state_member, raw_state_member in zip(
           last_chunk_evaluation_state, raw_evaluation_evaled.end_state):
         self.assertAllEqual(last_chunk_state_member, raw_state_member)
       self.assertAllEqual([[5, 7, 11]],
                           raw_evaluation_evaled.prediction_times)
       for feature_name in raw_evaluation.predictions:
         self.assertAllEqual(
             [1, 3, 1],  # batch, window, num_features. The window size has 2
                         # cut off for the first input_window.
             raw_evaluation_evaled.predictions[feature_name].shape)
         self.assertAllEqual(
             np.reshape(chunked_evaluation_evaled.predictions[feature_name],
                        [-1]),
             np.reshape(raw_evaluation_evaled.predictions[feature_name],
                        [-1]))
       coordinator.request_stop()
       coordinator.join()
 def testPandasFeeding(self):
   if not HAS_PANDAS:
     return
   with ops.Graph().as_default():
     array1 = np.arange(32)
     array2 = np.arange(32, 64)
     df = pd.DataFrame({"a": array1, "b": array2}, index=np.arange(64, 96))
     q = ff._enqueue_data(df, capacity=100)
     batch_size = 5
     dq_op = q.dequeue_many(5)
     with session.Session() as sess:
       coord = coordinator.Coordinator()
       threads = queue_runner_impl.start_queue_runners(sess=sess, coord=coord)
       for i in range(100):
         indices = [
             j % array1.shape[0]
             for j in range(batch_size * i, batch_size * (i + 1))
         ]
         expected_df_indices = df.index[indices]
         expected_rows = df.iloc[indices]
         dq = sess.run(dq_op)
         np.testing.assert_array_equal(expected_df_indices, dq[0])
         for col_num, col in enumerate(df.columns):
           np.testing.assert_array_equal(expected_rows[col].values,
                                         dq[col_num + 1])
       coord.request_stop()
       coord.join(threads)
 def testPandasFeeding(self):
   if not HAS_PANDAS:
     return
   batch_size = 3
   iterations = 1000
   index = np.arange(100, 132)
   a = np.arange(32)
   b = np.arange(32, 64)
   dataframe = pd.DataFrame({"a": a, "b": b}, index=index)
   pandas_source = in_memory_source.PandasSource(
       dataframe, batch_size=batch_size)
   pandas_columns = pandas_source()
   cache = {}
   with ops.Graph().as_default():
     pandas_tensors = [col.build(cache) for col in pandas_columns]
     with session.Session() as sess:
       coord = coordinator.Coordinator()
       threads = queue_runner_impl.start_queue_runners(sess=sess, coord=coord)
       for i in range(iterations):
         indices = [
             j % dataframe.shape[0]
             for j in range(batch_size * i, batch_size * (i + 1))
         ]
         expected_df_indices = dataframe.index[indices]
         expected_rows = dataframe.iloc[indices]
         actual_value = sess.run(pandas_tensors)
         np.testing.assert_array_equal(expected_df_indices, actual_value[0])
         for col_num, col in enumerate(dataframe.columns):
           np.testing.assert_array_equal(expected_rows[col].values,
                                         actual_value[col_num + 1])
       coord.request_stop()
       coord.join(threads)
 def testNumpySource(self):
   batch_size = 3
   iterations = 1000
   array = np.arange(32).reshape([16, 2])
   numpy_source = in_memory_source.NumpySource(array, batch_size=batch_size)
   index_column = numpy_source().index
   value_column = numpy_source().value
   cache = {}
   with ops.Graph().as_default():
     value_tensor = value_column.build(cache)
     index_tensor = index_column.build(cache)
     with session.Session() as sess:
       coord = coordinator.Coordinator()
       threads = queue_runner_impl.start_queue_runners(sess=sess, coord=coord)
       for i in range(iterations):
         expected_index = [
             j % array.shape[0]
             for j in range(batch_size * i, batch_size * (i + 1))
         ]
         expected_value = get_rows(array, expected_index)
         actual_index, actual_value = sess.run([index_tensor, value_tensor])
         np.testing.assert_array_equal(expected_index, actual_index)
         np.testing.assert_array_equal(expected_value, actual_value)
       coord.request_stop()
       coord.join(threads)
Ejemplo n.º 16
0
def verify_tfrecord_image(dataset_dir, create_input_fn, channels=3):
    import matplotlib.pyplot as plt
    from tensorflow.python.training import coordinator
    from tensorflow.python.training import queue_runner_impl

    def details(img, label):
        print('------image: {}'.format(label))
        plt.imshow(img)
        plt.show()

    create_input_fns = create_input_fn(dataset_dir)

    for input_fn in create_input_fns:
        with tf.Session() as session:
            image, label = input_fn()
            coord = coordinator.Coordinator()
            threads = queue_runner_impl.start_queue_runners(session, coord=coord)
            img, lab = session.run([image['image'], label['label']])

            print('Train data {}'.format(img[:, :, :].shape))
            for i in xrange(3):
                details(img[i, :, :, :] if channels > 1 else img[i, :, :, 0], lab[i])

            coord.request_stop()
            coord.join(threads)
Ejemplo n.º 17
0
  def testNumpyInputFnWithYAsDict(self):
    a = np.arange(4) * 1.0
    b = np.arange(32, 36)
    x = {'a': a, 'b': b}
    y = {'y1': np.arange(-32, -28), 'y2': np.arange(32, 28, -1)}

    with self.test_session() as session:
      input_fn = numpy_io.numpy_input_fn(
          x, y, batch_size=2, shuffle=False, num_epochs=1)
      features_tensor, targets_tensor = input_fn()

      coord = coordinator.Coordinator()
      threads = queue_runner_impl.start_queue_runners(session, coord=coord)

      features, targets = session.run([features_tensor, targets_tensor])
      self.assertEqual(len(features), 2)
      self.assertAllEqual(features['a'], [0, 1])
      self.assertAllEqual(features['b'], [32, 33])
      self.assertEqual(len(targets), 2)
      self.assertAllEqual(targets['y1'], [-32, -31])
      self.assertAllEqual(targets['y2'], [32, 31])

      session.run([features_tensor, targets_tensor])
      with self.assertRaises(errors.OutOfRangeError):
        session.run([features_tensor, targets_tensor])

      coord.request_stop()
      coord.join(threads)
Ejemplo n.º 18
0
  def testNumpyInputFn(self):
    a = np.arange(4) * 1.0
    b = np.arange(32, 36)
    x = {'a': a, 'b': b}
    y = np.arange(-32, -28)

    with self.test_session() as session:
      input_fn = numpy_io.numpy_input_fn(
          x, y, batch_size=2, shuffle=False, num_epochs=1)
      features, target = input_fn()

      coord = coordinator.Coordinator()
      threads = queue_runner_impl.start_queue_runners(session, coord=coord)

      res = session.run([features, target])
      self.assertAllEqual(res[0]['a'], [0, 1])
      self.assertAllEqual(res[0]['b'], [32, 33])
      self.assertAllEqual(res[1], [-32, -31])

      session.run([features, target])
      with self.assertRaises(errors.OutOfRangeError):
        session.run([features, target])

      coord.request_stop()
      coord.join(threads)
Ejemplo n.º 19
0
  def test_linear_model_numpy_input_fn(self):
    price = fc.numeric_column('price')
    price_buckets = fc.bucketized_column(price, boundaries=[0., 10., 100.,])
    body_style = fc.categorical_column_with_vocabulary_list(
        'body-style', vocabulary_list=['hardtop', 'wagon', 'sedan'])

    input_fn = numpy_io.numpy_input_fn(
        x={
            'price': np.array([-1., 2., 13., 104.]),
            'body-style': np.array(['sedan', 'hardtop', 'wagon', 'sedan']),
        },
        batch_size=2,
        shuffle=False)
    features = input_fn()
    net = fc.linear_model(features, [price_buckets, body_style])
    # self.assertEqual(1 + 3 + 5, net.shape[1])
    with self._initialized_session() as sess:
      coord = coordinator.Coordinator()
      threads = queue_runner_impl.start_queue_runners(sess, coord=coord)

      bias = self._get_linear_model_bias()
      price_buckets_var = self._get_linear_model_column_var(price_buckets)
      body_style_var = self._get_linear_model_column_var(body_style)

      sess.run(price_buckets_var.assign([[10.], [100.], [1000.], [10000.]]))
      sess.run(body_style_var.assign([[-10.], [-100.], [-1000.]]))
      sess.run(bias.assign([5.]))

      self.assertAllClose([[10 - 1000 + 5.], [100 - 10 + 5.]], sess.run(net))

      coord.request_stop()
      coord.join(threads)
    def _test(self):
      rng = np.arange(
          -NUMPY_ARRAY_SIZE // 2, NUMPY_ARRAY_SIZE // 2, dtype="float32")

      frame = df.TensorFlowDataFrame.from_numpy(
          rng, batch_size=len(rng), shuffle=False)

      frame["sqr"] = frame["value"].square()

      self.assertTrue(hasattr(frame["value"], fn_name))

      frame["series_result"] = getattr(frame["value"], fn_name)(frame["sqr"])
      frame["scalar_result"] = getattr(frame["value"], fn_name)(SCALAR)

      frame_built = frame.build()

      expected_series_tensor = op(frame_built["value"], frame_built["sqr"])
      actual_series_tensor = frame_built["series_result"]

      expected_scalar_tensor = op(frame_built["value"], SCALAR)
      actual_scalar_tensor = frame_built["scalar_result"]

      session = session_lib.Session()
      coord = coordinator.Coordinator()
      threads = queue_runner_impl.start_queue_runners(sess=session, coord=coord)
      actual_series, expected_series, actual_scalar, expected_scalar = (
          session.run([
              actual_series_tensor, expected_series_tensor,
              actual_scalar_tensor, expected_scalar_tensor
          ]))
      coord.request_stop()
      coord.join(threads)
      np.testing.assert_almost_equal(expected_series, actual_series)
      np.testing.assert_almost_equal(expected_scalar, actual_scalar)
Ejemplo n.º 21
0
  def testNumpyInputFnWithYIsNone(self):
    a = np.arange(4) * 1.0
    b = np.arange(32, 36)
    x = {'a': a, 'b': b}
    y = None

    with self.test_session() as session:
      input_fn = numpy_io.numpy_input_fn(
          x, y, batch_size=2, shuffle=False, num_epochs=1)
      features_tensor = input_fn()

      coord = coordinator.Coordinator()
      threads = queue_runner_impl.start_queue_runners(session, coord=coord)

      feature = session.run(features_tensor)
      self.assertEqual(len(feature), 2)
      self.assertAllEqual(feature['a'], [0, 1])
      self.assertAllEqual(feature['b'], [32, 33])

      session.run([features_tensor])
      with self.assertRaises(errors.OutOfRangeError):
        session.run([features_tensor])

      coord.request_stop()
      coord.join(threads)
Ejemplo n.º 22
0
  def testNumpyInputFnWithBatchSizeNotDividedByDataSize(self):
    batch_size = 2
    a = np.arange(5) * 1.0
    b = np.arange(32, 37)
    x = {'a': a, 'b': b}
    y = np.arange(-32, -27)

    with self.test_session() as session:
      input_fn = numpy_io.numpy_input_fn(
          x, y, batch_size=batch_size, shuffle=False, num_epochs=1)
      features, target = input_fn()

      coord = coordinator.Coordinator()
      threads = queue_runner_impl.start_queue_runners(session, coord=coord)

      res = session.run([features, target])
      self.assertAllEqual(res[0]['a'], [0, 1])
      self.assertAllEqual(res[0]['b'], [32, 33])
      self.assertAllEqual(res[1], [-32, -31])

      res = session.run([features, target])
      self.assertAllEqual(res[0]['a'], [2, 3])
      self.assertAllEqual(res[0]['b'], [34, 35])
      self.assertAllEqual(res[1], [-30, -29])

      res = session.run([features, target])
      self.assertAllEqual(res[0]['a'], [4])
      self.assertAllEqual(res[0]['b'], [36])
      self.assertAllEqual(res[1], [-28])

      with self.assertRaises(errors.OutOfRangeError):
        session.run([features, target])

      coord.request_stop()
      coord.join(threads)
Ejemplo n.º 23
0
 def testBatch(self):
   initial_batch_size = 7
   final_batch_size = 13
   iterations = 50
   numpy_cols = in_memory_source.NumpySource(
       np.arange(1000, 2000), batch_size=initial_batch_size)()
   index_column = numpy_cols.index
   value_column = numpy_cols.value
   batcher = batch.Batch(
       batch_size=final_batch_size, output_names=["index", "value"])
   batched = batcher([index_column, value_column])
   cache = {}
   index_tensor = batched.index.build(cache)
   value_tensor = batched.value.build(cache)
   with self.test_session() as sess:
     coord = coordinator.Coordinator()
     threads = queue_runner_impl.start_queue_runners(sess=sess, coord=coord)
     for i in range(iterations):
       expected_index = range(i * final_batch_size, (i + 1) * final_batch_size)
       expected_value = range(1000 + i * final_batch_size,
                              1000 + (i + 1) * final_batch_size)
       actual_index, actual_value = sess.run([index_tensor, value_tensor])
       np.testing.assert_array_equal(expected_index, actual_index)
       np.testing.assert_array_equal(expected_value, actual_value)
     coord.request_stop()
     coord.join(threads)
Ejemplo n.º 24
0
  def test_keyed_features_filter(self):
    gfile.Glob = self._orig_glob
    lines = [
        '{"features": {"feature": {"age": {"int64_list": {"value": [2]}}}}}',
        '{"features": {"feature": {"age": {"int64_list": {"value": [0]}}}}}',
        '{"features": {"feature": {"age": {"int64_list": {"value": [1]}}}}}',
        '{"features": {"feature": {"age": {"int64_list": {"value": [0]}}}}}',
        '{"features": {"feature": {"age": {"int64_list": {"value": [3]}}}}}',
        '{"features": {"feature": {"age": {"int64_list": {"value": [5]}}}}}'
    ]
    filename = self._create_temp_file("\n".join(lines))

    batch_size = 2
    queue_capacity = 4
    name = "my_batch"
    features = {"age": parsing_ops.FixedLenFeature([], dtypes_lib.int64)}

    def filter_fn(keys, examples_json):
      del keys
      serialized = parsing_ops.decode_json_example(examples_json)
      examples = parsing_ops.parse_example(serialized, features)
      return math_ops.less(examples["age"], 2)

    with ops.Graph().as_default() as g, self.session(graph=g) as session:
      keys, inputs = graph_io._read_keyed_batch_examples_helper(
          filename,
          batch_size,
          reader=io_ops.TextLineReader,
          randomize_input=False,
          num_epochs=1,
          read_batch_size=batch_size,
          queue_capacity=queue_capacity,
          filter_fn=filter_fn,
          name=name)
      self.assertAllEqual((None,), keys.get_shape().as_list())
      self.assertAllEqual((None,), inputs.get_shape().as_list())
      session.run(variables.local_variables_initializer())

      coord = coordinator.Coordinator()
      threads = queue_runner_impl.start_queue_runners(session, coord=coord)
      # First batch of two filtered examples.
      out_keys, out_vals = session.run((keys, inputs))
      self.assertAllEqual(
          [filename.encode("utf-8") + b":2", filename.encode("utf-8") + b":3"],
          out_keys)
      self.assertAllEqual([lines[1].encode("utf-8"), lines[2].encode("utf-8")],
                          out_vals)

      # Second batch will only have one filtered example as that's the only
      # remaining example that satisfies the filtering criterion.
      out_keys, out_vals = session.run((keys, inputs))
      self.assertAllEqual([filename.encode("utf-8") + b":4"], out_keys)
      self.assertAllEqual([lines[3].encode("utf-8")], out_vals)

      # Exhausted input.
      with self.assertRaises(errors.OutOfRangeError):
        session.run((keys, inputs))

      coord.request_stop()
      coord.join(threads)
Ejemplo n.º 25
0
  def testGeneratorInputFn(self):

    def generator():
      for index in range(2):
        yield {
            'a': np.ones(1) * index,
            'b': np.ones(1) * index + 32,
            'label': np.ones(1) * index - 32
        }

    with self.cached_session() as session:
      input_fn = generator_io.generator_input_fn(
          generator,
          target_key='label',
          batch_size=2,
          shuffle=False,
          num_epochs=1)
      features, target = input_fn()

      coord = coordinator.Coordinator()
      threads = queue_runner_impl.start_queue_runners(session, coord=coord)

      res = session.run([features, target])
      self.assertAllEqual(res[0]['a'], np.asarray([0, 1]).reshape(-1, 1))
      self.assertAllEqual(res[0]['b'], np.asarray([32, 33]).reshape(-1, 1))
      self.assertAllEqual(res[1], np.asarray([-32, -31]).reshape(-1, 1))

      session.run([features])
      with self.assertRaises(errors.OutOfRangeError):
        session.run([features, target])

      coord.request_stop()
      coord.join(threads)
Ejemplo n.º 26
0
  def testGeneratorInputFnWithDifferentDimensionsOfFeatures(self):

    def generator():
      for index in range(100):
        yield {
            'a': np.ones((10, 10)) * index,
            'b': np.ones((5, 5)) * index + 32,
            'label': np.ones((3, 3)) * index - 32
        }

    with self.cached_session() as session:
      input_fn = generator_io.generator_input_fn(
          generator,
          target_key='label',
          batch_size=2,
          shuffle=False,
          num_epochs=1)
      features, target = input_fn()

      coord = coordinator.Coordinator()
      threads = queue_runner_impl.start_queue_runners(session, coord=coord)

      res = session.run([features, target])
      self.assertAllEqual(res[0]['a'],
                          np.vstack((np.zeros((10, 10)), np.ones(
                              (10, 10)))).reshape(2, 10, 10))
      self.assertAllEqual(res[0]['b'],
                          np.vstack((np.zeros((5, 5)), np.ones(
                              (5, 5)))).reshape(2, 5, 5) + 32)
      self.assertAllEqual(res[1],
                          np.vstack((np.zeros((3, 3)), np.ones(
                              (3, 3)))).reshape(2, 3, 3) - 32)

      coord.request_stop()
      coord.join(threads)
 def testNotAMultiple(self):
   num_unroll = 3  # Not a divisor of value_length -
   # so padding would have been necessary.
   with self.test_session() as sess:
     with self.assertRaisesRegexp(errors_impl.InvalidArgumentError,
                                  ".*should be a multiple of: 3, but saw "
                                  "value: 4. Consider setting pad=True."):
       coord = coordinator.Coordinator()
       threads = None
       try:
         with coord.stop_on_exception():
           next_batch = sqss.batch_sequences_with_states(
               input_key=self.key,
               input_sequences=self.sequences,
               input_context=self.context,
               input_length=3,
               initial_states=self.initial_states,
               num_unroll=num_unroll,
               batch_size=self.batch_size,
               num_threads=3,
               # to enforce that we only move on to the next examples after
               # finishing all segments of the first ones.
               capacity=2,
               pad=False)
           threads = queue_runner_impl.start_queue_runners(coord=coord)
           sess.run([next_batch.key])
       except errors_impl.OutOfRangeError:
         pass
       finally:
         coord.request_stop()
         if threads is not None:
           coord.join(threads, stop_grace_period_secs=2)
Ejemplo n.º 28
0
  def testPandasInputFn_ProducesOutputsForLargeBatchAndMultipleEpochs(self):
    if not HAS_PANDAS:
      return
    with self.test_session() as session:
      index = np.arange(100, 102)
      a = np.arange(2)
      b = np.arange(32, 34)
      x = pd.DataFrame({'a': a, 'b': b}, index=index)
      y = pd.Series(np.arange(-32, -30), index=index)
      input_fn = pandas_io.pandas_input_fn(
          x, y, batch_size=128, shuffle=False, num_epochs=2)

      results = input_fn()

      coord = coordinator.Coordinator()
      threads = queue_runner_impl.start_queue_runners(session, coord=coord)

      features, target = session.run(results)
      self.assertAllEqual(features['a'], [0, 1, 0, 1])
      self.assertAllEqual(features['b'], [32, 33, 32, 33])
      self.assertAllEqual(target, [-32, -31, -32, -31])

      with self.assertRaises(errors.OutOfRangeError):
        session.run(results)

      coord.request_stop()
      coord.join(threads)
Ejemplo n.º 29
0
  def test_batch_text_lines(self):
    gfile.Glob = self._orig_glob
    filename = self._create_temp_file("A\nB\nC\nD\nE\n")

    batch_size = 3
    queue_capacity = 10
    name = "my_batch"

    with ops.Graph().as_default() as g, self.test_session(graph=g) as session:
      inputs = graph_io.read_batch_examples(
          [filename],
          batch_size,
          reader=io_ops.TextLineReader,
          randomize_input=False,
          num_epochs=1,
          queue_capacity=queue_capacity,
          read_batch_size=10,
          name=name)
      self.assertAllEqual((None,), inputs.get_shape().as_list())
      session.run(variables.local_variables_initializer())

      coord = coordinator.Coordinator()
      threads = queue_runner_impl.start_queue_runners(session, coord=coord)

      self.assertAllEqual(session.run(inputs), [b"A", b"B", b"C"])
      self.assertAllEqual(session.run(inputs), [b"D", b"E"])
      with self.assertRaises(errors.OutOfRangeError):
        session.run(inputs)

      coord.request_stop()
      coord.join(threads)
Ejemplo n.º 30
0
  def testGeneratorInputFnWithMismatchinGeneratorKeys(self):

    def generator():
      index = 0
      yield {
          'a': np.ones(1) * index,
          'b': np.ones(1) * index + 32,
          'label': np.ones(1) * index - 32
      }
      index = 1
      yield {
          'a': np.ones(1) * index,
          'c': np.ones(1) * index + 32,
          'label': np.ones(1) * index - 32
      }

    with self.cached_session() as session:
      input_fn = generator_io.generator_input_fn(
          generator, target_key=None, batch_size=2, shuffle=False, num_epochs=1)
      features = input_fn()

      coord = coordinator.Coordinator()
      threads = queue_runner_impl.start_queue_runners(session, coord=coord)

      with self.assertRaises(errors.OutOfRangeError):
        session.run([features])

      with self.assertRaisesRegex(KeyError, 'key mismatch between dicts emitted'
                                  ' by GenFunExpected'):
        coord.request_stop()
        coord.join(threads)
Ejemplo n.º 31
0
 def start_queue_runners(self, sess):
   # Store session to be able to close inputs later
   if self._sess is None:
     self._sess = sess
   self._threads = queue_runner_impl.start_queue_runners(coord=self._coord)
Ejemplo n.º 32
0
  def _testBucketBySequenceLength(self,
                                  allow_small_batch,
                                  bucket_capacities=None,
                                  drain_entire_queue=True):
    ops.reset_default_graph()

    # All inputs must be identical lengths across tuple index.
    # The input reader will get input_length from the first tuple
    # entry.
    data_len = 4
    labels_len = 3
    input_pairs = [(length, ([np.int64(length)] * data_len,
                             [str(length).encode("ascii")] * labels_len))
                   for length in (1, 3, 4, 5, 6, 10)]

    lengths = array_ops.placeholder(dtypes_lib.int32, ())
    data = array_ops.placeholder(dtypes_lib.int64, (data_len,))
    labels = array_ops.placeholder(dtypes_lib.string, (labels_len,))

    batch_size = 8
    bucket_boundaries = [3, 4, 5, 10]
    num_pairs_to_enqueue = 50 * batch_size + 100

    # Make capacity very large so we can feed all the inputs in the
    # main thread without blocking
    input_queue = data_flow_ops.FIFOQueue(
        5000, (dtypes_lib.int32, dtypes_lib.int64, dtypes_lib.string), (
            (), (data_len,), (labels_len,)))
    input_enqueue_op = input_queue.enqueue((lengths, data, labels))
    lengths_t, data_t, labels_t = input_queue.dequeue()
    close_input_op = input_queue.close()

    (out_lengths_t, data_and_labels_t) = (bucket_ops.bucket_by_sequence_length(
        input_length=lengths_t,
        tensors=[data_t, labels_t],
        batch_size=batch_size,
        bucket_boundaries=bucket_boundaries,
        bucket_capacities=bucket_capacities,
        allow_smaller_final_batch=allow_small_batch,
        num_threads=10))

    expected_batch_size = None if allow_small_batch else batch_size
    self.assertEqual(out_lengths_t.get_shape().as_list(), [expected_batch_size])
    self.assertEqual(data_and_labels_t[0].get_shape().as_list(),
                     [expected_batch_size, data_len])
    self.assertEqual(data_and_labels_t[1].get_shape().as_list(),
                     [expected_batch_size, labels_len])

    def _read_test(sess):
      num_pairs_dequeued = 0
      try:
        while drain_entire_queue or num_pairs_dequeued < 40 * batch_size:
          (out_lengths, (data, labels)) = sess.run(
              (out_lengths_t, data_and_labels_t))
          num_pairs_dequeued += out_lengths.shape[0]
          if allow_small_batch:
            self.assertEqual(data_len, data.shape[1])
            self.assertEqual(labels_len, labels.shape[1])
            self.assertGreaterEqual(batch_size, out_lengths.shape[0])
            self.assertGreaterEqual(batch_size, data.shape[0])
            self.assertGreaterEqual(batch_size, labels.shape[0])
          else:
            self.assertEqual((batch_size, data_len), data.shape)
            self.assertEqual((batch_size, labels_len), labels.shape)
            self.assertEqual((batch_size,), out_lengths.shape)
          for (lr, dr, tr) in zip(out_lengths, data, labels):
            # Make sure length matches data (here it's the same value).
            self.assertEqual(dr[0], lr)
            # Make sure data & labels match.
            self.assertEqual(dr[0], int(tr[0].decode("ascii")))
            # Make sure for each row, data came from the same bucket.
            self.assertEqual(
                _which_bucket(bucket_boundaries, dr[0]),
                _which_bucket(bucket_boundaries, dr[1]))
      except errors.OutOfRangeError:
        if allow_small_batch:
          self.assertEqual(num_pairs_to_enqueue, num_pairs_dequeued)
        else:
          # Maximum left over in the queues should be at most one less than the
          # batch_size, for every bucket.
          num_buckets = len(bucket_boundaries) + 2
          self.assertLessEqual(
              num_pairs_to_enqueue - (batch_size - 1) * num_buckets,
              num_pairs_dequeued)

    with self.cached_session() as sess:
      coord = coordinator.Coordinator()

      # Feed the inputs, then close the input thread.
      for _ in range(num_pairs_to_enqueue):
        which = random.randint(0, len(input_pairs) - 1)
        length, pair = input_pairs[which]
        sess.run(input_enqueue_op,
                 feed_dict={lengths: length,
                            data: pair[0],
                            labels: pair[1]})
      sess.run(close_input_op)

      # Start the queue runners
      threads = queue_runner_impl.start_queue_runners(coord=coord)
      # Read off the top of the bucket and ensure correctness of output
      _read_test(sess)
      coord.request_stop()
      coord.join(threads)
Ejemplo n.º 33
0
def train():
    vocab_size = len(open(FLAGS.vocab_file).readlines())
    id_to_label = load_id_to_label()
    num_label = len(id_to_label)
    print('#vocab={} #label={}'.format(vocab_size, num_label))

    parse_spec = get_parse_spec(FLAGS.use_ngrams, num_label)
    features = tf.contrib.learn.read_batch_features(
        FLAGS.train_tfrecord,
        FLAGS.batch_size,
        parse_spec,
        tf.TFRecordReader,
        num_epochs=FLAGS.num_epochs,
        reader_num_threads=FLAGS.num_threads)
    text_ts = tf.sparse_tensor_to_dense(features[TEXT_KEY],
                                        default_value=DEFAULT_WORD)
    label_ts = features.pop(LABELS_KEY)

    # text_ph = tf.placeholder(tf.string, shape=(None, None))
    text_ph = tf.placeholder(tf.int64, shape=(None, None))
    label_ph = tf.placeholder(tf.float32, shape=(None, num_label))
    # text_lookup_table = tf.contrib.lookup.index_table_from_file(
    #         FLAGS.vocab_file, FLAGS.num_oov_vocab_buckets, vocab_size)
    # text_ids = text_lookup_table.lookup(text_ph)
    text_ids = text_ph
    # text_embedding_w = tf.Variable(tf.random_uniform([vocab_size + FLAGS.num_oov_vocab_buckets, FLAGS.embedding_dimension], -0.1, 0.1))
    text_embedding_w = tf.Variable(
        tf.random_uniform([vocab_size + 1, FLAGS.embedding_dimension], -0.1,
                          0.1))
    text_embedding = tf.reduce_mean(tf.nn.embedding_lookup(
        text_embedding_w, text_ids),
                                    axis=-2)
    input_layer = text_embedding
    logits_ts = tf.contrib.layers.fully_connected(inputs=input_layer,
                                                  num_outputs=num_label,
                                                  activation_fn=None)
    loss_ts = tf.reduce_mean(
        tf.nn.sigmoid_cross_entropy_with_logits(labels=label_ph,
                                                logits=logits_ts))
    optimizer = tf.train.AdamOptimizer(FLAGS.learning_rate)
    train_op = optimizer.minimize(loss_ts,
                                  global_step=tf.train.get_global_step())
    var_init = tf.global_variables_initializer()
    tab_init = tf.tables_initializer()

    tf.summary.scalar('loss', loss_ts)
    summary_op = tf.summary.merge_all()

    features_v = tf.contrib.learn.read_batch_features(
        FLAGS.valid_tfrecord,
        FLAGS.batch_size,
        parse_spec,
        tf.TFRecordReader,
        num_epochs=1,
        reader_num_threads=FLAGS.num_threads)
    text_ts_v = tf.sparse_tensor_to_dense(features_v[TEXT_KEY],
                                          default_value=DEFAULT_WORD)
    label_ts_v = features_v.pop(LABELS_KEY)

    from tensorflow.python.framework import errors
    from tensorflow.python.ops import variables
    from tensorflow.python.training import coordinator
    from tensorflow.python.training import queue_runner_impl
    with tf.Session() as sess:
        writer = tf.summary.FileWriter(FLAGS.logs_dir,
                                       graph=tf.get_default_graph())

        sess.run(variables.local_variables_initializer())
        coord = coordinator.Coordinator()
        threads = queue_runner_impl.start_queue_runners(sess, coord=coord)
        sess.run(var_init)
        sess.run(tab_init)
        total_size = 0
        try:
            while not coord.should_stop():
                # feature_np, label_np = sess.run([features, label_ts])
                # text_np = feature_np[TEXT_KEY]
                # print(type(text_np), text_np.shape, type(label_np), label_np.shape)
                # for i in range(FLAGS.batch_size):
                #     label_ids = [j for j in range(num_label) if label_np[i,j] != 0]
                #     labels = [id_to_label[label_id] for label_id in label_ids]
                #     text = [text_np[i,j].decode('utf-8') for j in range(text_np.shape[1]) if text_np[i,j] != b' ']
                #     text = ' '.join(text)
                #     print(str(text), labels)
                #     input()
                # input()
                for train_step in range(1000000):
                    text_np, label_np = sess.run([text_ts, label_ts])
                    total_size += FLAGS.batch_size
                    # print(type(text_np), text_np.shape, type(label_np), label_np.shape)
                    # for i in range(FLAGS.batch_size):
                    #     label_ids = [j for j in range(num_label) if label_np[i,j] != 0]
                    #     labels = [id_to_label[label_id] for label_id in label_ids]
                    #     text = [text_np[i,j].decode('utf-8') for j in range(text_np.shape[1]) if text_np[i,j] != b' ']
                    #     text = ' '.join(text)
                    #     print(str(text), labels)
                    #     input()

                    feed_dict = {text_ph: text_np, label_ph: label_np}
                    _, loss, summary = sess.run(
                        [train_op, loss_ts, summary_op], feed_dict=feed_dict)
                    if (train_step + 1) % 100 == 0:
                        writer.add_summary(summary, train_step)
                        print('#{0} loss={1:.4f}'.format(train_step, loss))
        except errors.OutOfRangeError:
            print('total={}'.format(total_size))
            cutoff = 3
            prec_v, rec_v = [], []
            for valid_step in range(int(2000 / FLAGS.batch_size)):
                text_np, label_np = sess.run([text_ts_v, label_ts_v])
                feed_dict = {text_ph: text_np, label_ph: label_np}
                logits, = sess.run([logits_ts], feed_dict=feed_dict)
                prec_bt = precision(logits, label_np, cutoff)
                prec_v.append(prec_bt)
                rec_bt = recall(logits, label_np, cutoff)
                rec_v.append(rec_bt)
            prec_v, rec_v = np.mean(prec_v), np.mean(rec_v)
            print('prec={0:.4f} rec={1:.4f}'.format(prec_v, rec_v))
        finally:
            coord.request_stop()

        coord.join(threads)
Ejemplo n.º 34
0
def train(model, input_dims, output_dims, seq_length, size, num_gpus, dataset,
          experiment_name, load_model, num_vids, n_epochs, split,
          base_data_path, f_name, learning_rate_init, wd, save_freq,
          clip_length, video_offset, clip_offset, num_clips, clip_stride,
          batch_size, loss_type, metrics_dir, loaded_checkpoint, verbose,
          opt_choice, gpu_list, grad_clip_value, preproc_method, random_init,
          shuffle_seed, preproc_debugging, reverse):
    """
    Training function used to train or fine-tune a chosen model
    Args:
        :model:              tf-activity-recognition framework model object
        :input_dims:         Number of frames used in input
        :output_dims:        Integer number of classes in current dataset
        :seq_length:         Length of output sequence expected from LSTM
        :size:               List detailing height and width of frame
        :num_gpus:           Number of gpus to use when training
        :dataset:            Name of dataset being processed
        :experiment_name:    Name of current experiment
        :load_model:         Boolean variable indicating whether to load from a checkpoint or not
        :num_vids:           Number of videos to be used for training
        :n_epochs:           Total number of epochs to train
        :split:              Split of dataset being used
        :base_data_path:     Full path to root directory containing datasets
        :f_name:             Specific video directory within a chosen split of a dataset
        :learning_rate_init: Initializer for learning rate
        :wd:                 Weight decay
        :save_freq:          Frequency, in epochs, with which to save
        :clip_length:        Length of clips to cut video into, -1 indicates using the entire video as one clip')
        :video_offset:       String indicating where to begin selecting video clips (provided clipOffset is None)
        :clip_offset:        "none" or "random" indicating where to begin selecting video clips
        :num_clips:          Number of clips to break video into
        :clip_stride:        Number of frames that overlap between clips, 0 indicates no overlap and negative values indicate a gap of frames between clips
        :batch_size:         Number of clips to load into the model each step.
        :loss_type:          String declaring loss type associated with a chosen model
        :metrics_dir:        Name of subdirectory within the experiment to store metrics. Unique directory names allow for parallel testing
        :loaded_checkpoint:  Specify the exact checkpoint of saved model to be loaded for further training/testing
        :verbose:            Boolean to indicate if all print statement should be procesed or not
        :opt_choice:         String indicating optimizer selected
        :gpu_list:           List of GPU IDs to be used
        :grad_clip_value:    Float value at which to clip normalized gradients
        :lr_boundaries:      List of epoch boundaries at which lr will be updated
        :lr_values:          List of lr multipliers to learning_rate_init at boundaries mentioned in lr_boundaries
        :preproc_method:     The preprocessing method to use, default, cvr, rr, sr, or any other custom preprocessing
        :random_init:        Randomly initialize model weights, not loading from any files (deafult False)
        :preproc_debugging:  Boolean indicating whether to load videos and clips in a queue or to load them directly for debugging (Default 0)
        :reverse:            Boolean indicating whether reverse videos and classify them as a new action class.

    Returns:
        Does not return anything
    """

    with tf.name_scope("my_scope") as scope:

        # Initializers for checkpoint and global step variable
        ckpt = None
        gs_init = 0

        ################################### Checkpoint loading block #######################################################

        # Load pre-trained/saved model to continue training (or fine-tune)
        if load_model:
            try:
                ckpt, gs_init, learning_rate_init = load_checkpoint(
                    model.name, dataset, experiment_name, loaded_checkpoint,
                    preproc_method)
                if verbose:
                    print 'A better checkpoint is found. The global_step value is: ' + str(
                        gs_init)

            except:
                if verbose:
                    print "Failed loading checkpoint requested. Please check."
                exit()

            # END TRY
        else:
            ckpt = model.load_default_weights()

        # END IF

        ######################################################################################################################

        # Initialize model variables
        global_step = tf.Variable(gs_init, name='global_step', trainable=False)
        number_of_videos = tf.Variable(num_vids,
                                       name='number_of_videos',
                                       trainable=False)
        number_of_epochs = tf.Variable(n_epochs,
                                       name='number_of_epochs',
                                       trainable=False)
        video_step = tf.Variable(1.0, name='video_step', trainable=False)
        istraining = True
        reuse_variables = None

        # TF session setup
        config = tf.ConfigProto(
            allow_soft_placement=True
        )  #, gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.8))
        sess = tf.Session(config=config)
        init = tf.global_variables_initializer()

        # Variables get randomly initialized into tf graph
        sess.run(init)

        tower_losses = []
        tower_grads = []
        tower_slogits = []

        data_path = os.path.join(base_data_path, 'tfrecords_' + dataset,
                                 'Split' + str(split), f_name)

        # Setup tensors for models
        # input_data_tensor - [batchSize, inputDims, height, width, channels]
        input_data_tensor, labels_tensor, names_tensor = load_dataset(
            model,
            num_gpus,
            batch_size,
            output_dims,
            input_dims,
            seq_length,
            size,
            data_path,
            dataset,
            istraining,
            clip_length,
            video_offset,
            clip_offset,
            num_clips,
            clip_stride,
            video_step,
            preproc_debugging,
            shuffle_seed,
            verbose,
            reverse=reverse)

        ############### TO DO: FIX THIS ASAP ########################
        if ((batch_size == 1) and (num_clips == 1)):
            sess.run(tf.assign_add(video_step, -2))

        else:
            sess.run(tf.assign_add(video_step, -1))

        # END IF
        ############################################################

        learning_rate = tf.Variable(learning_rate_init,
                                    name='learning_rate',
                                    trainable=False)

        # Define optimizer (Current selection is only momentum optimizer)
        if opt_choice == 'gd':
            optimizer = lambda lr: tf.train.GradientDescentOptimizer(lr)

        elif opt_choice == 'adam':
            optimizer = lambda lr: tf.train.AdamOptimizer(lr)

        else:
            optimizer = lambda lr: tf.train.MomentumOptimizer(learning_rate=lr,
                                                              momentum=0.9)

        # END IF
        """ Multi-GPU setup: 1) Associate gpu device to specific model replica
                             2) Setup tower name scope for variables
        """

        ################# GPU list check block ####################

        assert ((len(gpu_list) == num_gpus) or (len(gpu_list) == 0))

        if len(gpu_list) == 0:
            gpu_list = [str(x) for x in range(num_gpus)]

        # END IF

        ###########################################################

        ################################################## Setup TF graph block ######################################################
        for gpu_idx in range(num_gpus):
            with tf.device('/gpu:' + str(gpu_list[gpu_idx])):
                with tf.name_scope('%s_%d' %
                                   ('tower', int(gpu_list[gpu_idx]))) as scope:
                    with tf.variable_scope(tf.get_variable_scope(),
                                           reuse=reuse_variables):
                        returned_layers = model.inference(
                            input_data_tensor[gpu_idx *
                                              batch_size:gpu_idx * batch_size +
                                              batch_size, :, :, :, :],
                            istraining,
                            input_dims,
                            output_dims,
                            seq_length,
                            scope,
                            return_layer=['logits'],
                            weight_decay=wd)

                        logits = tf.cast(returned_layers[0], tf.float32)

                        # Calculating Softmax for probability outcomes : Can be modified, make function internal to model
                        slogits = tf.nn.softmax(logits)

                    # END WITH

                    reuse_variables = True
                    """ Within GPU mini-batch: 1) Calculate loss,
                                               2) Initialize optimizer with required learning rate and
                                               3) Compute gradients
                                               4) Aggregate losses, gradients and logits
                    """

                    total_loss = model.loss(
                        logits, labels_tensor[gpu_idx *
                                              batch_size:gpu_idx * batch_size +
                                              batch_size, :], loss_type)
                    opt = optimizer(learning_rate)
                    gradients = opt.compute_gradients(
                        total_loss, vars_.trainable_variables())

                    tower_losses.append(total_loss)
                    tower_grads.append(gradients)
                    tower_slogits.append(slogits)

                # END WITH

            # END WITH

        # END FOR
        """  After: 1) Computing gradients and losses need to be stored and averaged
                    2) Clip gradients by norm to required value
                    3) Apply mean gradient updates
        """

        gradients = _average_gradients(tower_grads)
        gradients, variables = zip(*gradients)
        clipped_gradients, _ = clip_ops.clip_by_global_norm(
            gradients, grad_clip_value)
        gradients = list(zip(clipped_gradients, variables))
        grad_updates = opt.apply_gradients(gradients,
                                           global_step=global_step,
                                           name="train")
        train_op = grad_updates

        ############################################################################################################################################

        if save_bool:
            ######################### Logger Setup block ######################################

            # Logging setup initialization (Naming format: Date, month, hour, minute, second)
            log_name = (
                "exp_train_%s_%s_%s" %
                (time.strftime("%d_%m_%H_%M_%S"), dataset, experiment_name))
            make_dir('results')
            make_dir(os.path.join('results', model.name))
            make_dir(os.path.join('results', model.name, dataset))
            make_dir(
                os.path.join('results', model.name, dataset, preproc_method))
            make_dir(
                os.path.join('results', model.name, dataset, preproc_method,
                             experiment_name))
            make_dir(
                os.path.join('results', model.name, dataset, preproc_method,
                             experiment_name, 'checkpoints'))
            curr_logger = Logger(
                os.path.join('logs', model.name, dataset, preproc_method,
                             metrics_dir, log_name))

            ####################################################################################

        # END IF

        init = tf.global_variables_initializer()
        coord = tf.train.Coordinator()
        threads = queue_runner_impl.start_queue_runners(sess=sess, coord=coord)

        # Variables get randomly initialized into tf graph
        sess.run(init)

        # Check that weights were loaded or random initializations are requested
        if ((ckpt == None) or (random_init)):
            print "Caution: Model weights are not being loaded, using random initialization."

        else:
            # Model variables initialized from previous saved models
            initialize_from_dict(sess, ckpt, model.name)

        # END IF

        del ckpt

        # Initialize tracking variables
        previous_vid_name = ""
        videos_loaded = 0
        tot_count = 0
        acc = 0
        epoch_count = 0
        tot_load_time = 0.0
        tot_train_time = 0.0
        last_loss = None

        losses = []
        total_pred = []
        save_data = []
        total_params = []
        losses_tracker = []

        # Timing test setup
        time_init = time.time()

        batch_count = 0
        epoch_acc = 0
        l_r = learning_rate_init

        ########################################## Training loop block ################################################################

        # Loop epoch number of time over the training set
        while videos_loaded < n_epochs * num_vids:
            # Variable to update during epoch intervals
            if (epoch_count + 1) * num_vids <= videos_loaded < (
                    epoch_count + 1) * num_vids + num_gpus * batch_size:
                batch_count = 0
                epoch_acc = 0

                if epoch_count % save_freq == 0 and tot_count > 0:
                    if save_bool:
                        if verbose:
                            print "Saving..."

                        save_checkpoint(sess, model.name, dataset,
                                        experiment_name, preproc_method, l_r,
                                        global_step.eval(session=sess))

                # END IF

                epoch_count += 1

            # END IF

            time_pre_train = time.time()

            ######################################### Running TF training session block ##################################
            _, loss_train, predictions, gs, labels, vid_names, l_r, track_vars = sess.run(
                [
                    train_op, tower_losses, tower_slogits, global_step,
                    labels_tensor, names_tensor, learning_rate,
                    model.get_track_variables()
                ])

            ################################################################################################################

            if verbose:
                print vid_names

            for name in vid_names:
                if name != previous_vid_name:
                    videos_loaded += 1
                    previous_vid_name = name
                tot_count += 1

            ######## Adaptive Learning Rate Control Block ############################

            losses_tracker.append(np.mean(loss_train))

            if videos_loaded % 10 == 0 and videos_loaded > 0:
                if last_loss is None:
                    last_loss = sum(losses_tracker) / 10

                else:
                    difference_loss = last_loss - sum(losses_tracker) / 10
                    last_loss = sum(losses_tracker) / 10

                    if abs(difference_loss) < 0.001:
                        learning_rate /= 10

                    # END IF

                # END IF

                if len(losses_tracker) == 10:
                    losses_tracker = []

                # END IF

            # END IF

            ###########################################################################

            # Transpose the extracted layers such that the mean is taken across the gpus and over any matrix with more than 1 dimension
            params_array = []
            for key in track_vars.keys():
                curr_params = np.array(track_vars[key])
                if len(curr_params.shape) > 1:
                    indices = np.arange(len(curr_params.shape)) + 1
                    indices[-1] = 0
                    curr_params = curr_params.transpose(indices)
                    params_array.append(
                        np.mean(curr_params,
                                axis=tuple(range(len(curr_params.shape))[1:])))

                else:
                    params_array.append([np.mean(curr_params)])

                # END IF

            # END FOR

            #################### Training accuracy computation block ###############

            # Compute training epoch accuracy
            for gpu_pred_idx in range(len(predictions)):
                for batch_idx in range(predictions[gpu_pred_idx].shape[0]):
                    pred = np.mean(predictions[gpu_pred_idx][batch_idx],
                                   0).argmax()

                    if pred == labels[gpu_pred_idx * batch_size +
                                      batch_idx][0]:
                        epoch_acc += 1

                    # END IF

                    batch_count += 1

                # END FOR

            # END FOR

            ###################### Add variables to be tracked to logger #############

            time_post_train = time.time()
            tot_train_time += time_post_train - time_pre_train

            if verbose:
                print 'train_time: ', time_post_train - time_pre_train
                print 'step, loss: ', gs, loss_train
                print 'labels: ', labels

            # END IF

            if save_bool:
                curr_logger.add_scalar_value('train/train_time',
                                             time_post_train - time_pre_train,
                                             step=gs)
                curr_logger.add_scalar_value('train/loss',
                                             float(np.mean(loss_train)),
                                             step=gs)
                curr_logger.add_scalar_value('train/epoch_acc',
                                             epoch_acc / float(batch_count),
                                             step=gs)

                for layer in range(len(params_array)):
                    for p in range(len(params_array[layer])):
                        curr_logger.add_scalar_value(
                            'tracked_training_variables/' +
                            str(track_vars.keys()[layer] + '_' + str(p)),
                            float(params_array[layer][p]),
                            step=gs)

                    # END FOR

                # END FOR

                total_params.append(params_array)

                curr_logger.add_scalar_value(
                    'tracked_training_variables/learning_rate',
                    float(l_r),
                    step=gs)

            # END IF

        # END WHILE

        #########################################################################################################################################################

        if save_bool:
            if verbose:
                print "Saving..."

            # END IF

            save_checkpoint(sess, model.name, dataset, experiment_name,
                            preproc_method, l_r, gs)
            coord.request_stop()
            coord.join(threads)

        # END IF

        if verbose:
            print "Tot train time: ", tot_train_time
            print "Tot time:       ", time.time() - time_init

    # END WITH

        if save_bool:
            # Save tracked parameterization variables as a numpy file
            if len(total_params) != 0:
                total_params = np.array(total_params).flatten()
                make_dir(
                    os.path.join('results', model.name, dataset,
                                 preproc_method, experiment_name, metrics_dir))

                if os.path.isfile(
                        os.path.join('results', model.name, dataset,
                                     preproc_method, experiment_name,
                                     metrics_dir,
                                     'train_params_' + dataset + '.npy')):

                    loaded_params = np.load(
                        os.path.join('results', model.name, dataset,
                                     preproc_method, experiment_name,
                                     metrics_dir,
                                     'train_params_' + dataset + '.npy'))
                    total_params = np.concatenate(
                        [loaded_params, total_params])

        # END IF

                np.save(
                    os.path.join('results', model.name, dataset,
                                 preproc_method, experiment_name, metrics_dir,
                                 'train_params_' + dataset + '.npy'),
                    total_params)
Ejemplo n.º 35
0
    def test_read_text_lines_large(self):
        gfile.Glob = self._orig_glob
        sequence_prefix = "abcdefghijklmnopqrstuvwxyz123456789"
        num_records = 49999
        lines = [
            "".join([sequence_prefix, str(l)]).encode("ascii")
            for l in xrange(num_records)
        ]
        json_lines = [
            "".join([
                '{"features": { "feature": { "sequence": {',
                '"bytes_list": { "value": ["',
                base64.b64encode(l).decode("ascii"), '"]}}}}}\n'
            ]) for l in lines
        ]
        filename = self._create_temp_file("".join(json_lines))
        batch_size = 10000
        queue_capacity = 10000
        name = "my_large_batch"

        features = {
            "sequence": parsing_ops.FixedLenFeature([], dtypes_lib.string)
        }

        with ops.Graph().as_default() as g, self.test_session(
                graph=g) as session:
            keys, result = graph_io.read_keyed_batch_features(
                filename,
                batch_size,
                features,
                io_ops.TextLineReader,
                randomize_input=False,
                num_epochs=1,
                queue_capacity=queue_capacity,
                num_enqueue_threads=2,
                parse_fn=parsing_ops.decode_json_example,
                name=name)
            self.assertAllEqual((None, ), keys.get_shape().as_list())
            self.assertEqual(1, len(result))
            self.assertAllEqual((None, ),
                                result["sequence"].get_shape().as_list())
            session.run(variables.local_variables_initializer())
            coord = coordinator.Coordinator()
            threads = queue_runner_impl.start_queue_runners(session,
                                                            coord=coord)

            data = []
            try:
                while not coord.should_stop():
                    data.append(session.run(result))
            except errors.OutOfRangeError:
                pass
            finally:
                coord.request_stop()

            coord.join(threads)

        parsed_records = [
            item for sublist in [d["sequence"] for d in data]
            for item in sublist
        ]
        # Check that the number of records matches expected and all records
        # are present.
        self.assertEqual(len(parsed_records), num_records)
        self.assertEqual(set(parsed_records), set(lines))
Ejemplo n.º 36
0
 def _input_statistics_test_template(self,
                                     stat_object,
                                     num_features,
                                     dtype,
                                     give_full_data,
                                     warmup_iterations=0,
                                     rtol=1e-6,
                                     data_length=500,
                                     chunk_size=4):
     graph = ops.Graph()
     with graph.as_default():
         numpy_dtype = dtype.as_numpy_dtype
         values = (
             (numpy.arange(data_length, dtype=numpy_dtype)[..., None] +
              numpy.arange(num_features, dtype=numpy_dtype)[None,
                                                            ...])[None])
         times = 2 * (numpy.arange(data_length)[None]) - 3
         if give_full_data:
             stat_object.set_data((times, values))
         features = {
             TrainEvalFeatures.TIMES: times,
             TrainEvalFeatures.VALUES: values
         }
         input_fn = input_pipeline.RandomWindowInputFn(
             batch_size=16,
             window_size=chunk_size,
             time_series_reader=input_pipeline.NumpyReader(features))
         statistics = stat_object.initialize_graph(features=input_fn()[0])
         with self.session(graph=graph) as session:
             variables.global_variables_initializer().run()
             coordinator = coordinator_lib.Coordinator()
             queue_runner_impl.start_queue_runners(session,
                                                   coord=coordinator)
             for _ in range(warmup_iterations):
                 # A control dependency should ensure that, for queue-based statistics,
                 # a use of any statistic is preceded by an update of all adaptive
                 # statistics.
                 statistics.total_observation_count.eval()
             self.assertAllClose(
                 range(num_features) +
                 numpy.mean(numpy.arange(chunk_size))[None],
                 statistics.series_start_moments.mean.eval(),
                 rtol=rtol)
             self.assertAllClose(
                 numpy.tile(
                     numpy.var(numpy.arange(chunk_size))[None],
                     [num_features]),
                 statistics.series_start_moments.variance.eval(),
                 rtol=rtol)
             self.assertAllClose(
                 numpy.mean(values[0], axis=0),
                 statistics.overall_feature_moments.mean.eval(),
                 rtol=rtol)
             self.assertAllClose(
                 numpy.var(values[0], axis=0),
                 statistics.overall_feature_moments.variance.eval(),
                 rtol=rtol)
             self.assertAllClose(-3,
                                 statistics.start_time.eval(),
                                 rtol=rtol)
             self.assertAllClose(data_length,
                                 statistics.total_observation_count.eval(),
                                 rtol=rtol)
             coordinator.request_stop()
             coordinator.join()
    with tf.variable_scope('Loss'):
        loss = tf.reduce_mean(
            tf.nn.softmax_cross_entropy_with_logits(labels=batched_y,
                                                    logits=batched_x))
        optimizer = tf.train.AdamOptimizer(0.1)
        train_op = optimizer.minimize(loss)
        tf.summary.scalar('Loss', loss)

    merged = tf.summary.merge_all()

    t1 = time.time()
    sess = tf.Session()
    checkpoint_path = os.path.join(logdir, scale + '_model')
    writer = tf.summary.FileWriter(logdir, sess.graph)
    sess.run(
        [tf.global_variables_initializer(),
         tf.local_variables_initializer()])
    coord = tf.train.Coordinator()
    threads = queue_runner_impl.start_queue_runners(sess=sess)
    saver = saver_lib.Saver(write_version=saver_pb2.SaverDef.V2)
    saver.save(sess, checkpoint_path)
    for i in range(num_batches * num_epochs):
        l, _, summary = sess.run([loss, train_op, merged])
        writer.add_summary(summary, i)
        print 'batch ' + str(i + 1) + '/' + str(
            num_batches * num_epochs) + '\tLoss:' + str(l)
    writer.close()
    coord.request_stop()
    coord.join(threads)
    print 'program takes time:' + str(time.time() - t1)
Ejemplo n.º 38
0
def _train_on_generated_data(
    generate_fn, generative_model, train_iterations, seed,
    learning_rate=0.1, ignore_params_fn=lambda _: (),
    derived_param_test_fn=lambda _: (),
    train_input_fn_type=input_pipeline.WholeDatasetInputFn,
    train_state_manager=state_management.PassthroughStateManager()):
  """The training portion of parameter recovery tests."""
  random_seed.set_random_seed(seed)
  generate_graph = ops.Graph()
  with generate_graph.as_default():
    with session.Session(graph=generate_graph):
      generative_model.initialize_graph()
      time_series_reader, true_parameters = generate_fn(generative_model)
      true_parameters = {
          tensor.name: value for tensor, value in true_parameters.items()}
  eval_input_fn = input_pipeline.WholeDatasetInputFn(time_series_reader)
  eval_state_manager = state_management.PassthroughStateManager()
  true_parameter_eval_graph = ops.Graph()
  with true_parameter_eval_graph.as_default():
    generative_model.initialize_graph()
    ignore_params = ignore_params_fn(generative_model)
    feature_dict, _ = eval_input_fn()
    eval_state_manager.initialize_graph(generative_model)
    feature_dict[TrainEvalFeatures.VALUES] = math_ops.cast(
        feature_dict[TrainEvalFeatures.VALUES], generative_model.dtype)
    model_outputs = eval_state_manager.define_loss(
        model=generative_model,
        features=feature_dict,
        mode=estimator_lib.ModeKeys.EVAL)
    with session.Session(graph=true_parameter_eval_graph) as sess:
      variables.global_variables_initializer().run()
      coordinator = coordinator_lib.Coordinator()
      queue_runner_impl.start_queue_runners(sess, coord=coordinator)
      true_param_loss = model_outputs.loss.eval(feed_dict=true_parameters)
      true_transformed_params = {
          param: param.eval(feed_dict=true_parameters)
          for param in derived_param_test_fn(generative_model)}
      coordinator.request_stop()
      coordinator.join()

  saving_hook = _SavingTensorHook(
      tensors=true_parameters.keys(),
      every_n_iter=train_iterations - 1)

  class _RunConfig(estimator_lib.RunConfig):

    @property
    def tf_random_seed(self):
      return seed

  estimator = estimators.TimeSeriesRegressor(
      model=generative_model,
      config=_RunConfig(),
      state_manager=train_state_manager,
      optimizer=adam.AdamOptimizer(learning_rate))
  train_input_fn = train_input_fn_type(time_series_reader=time_series_reader)
  trained_loss = (estimator.train(
      input_fn=train_input_fn,
      max_steps=train_iterations,
      hooks=[saving_hook]).evaluate(
          input_fn=eval_input_fn, steps=1))["loss"]
  logging.info("Final trained loss: %f", trained_loss)
  logging.info("True parameter loss: %f", true_param_loss)
  return (ignore_params, true_parameters, true_transformed_params,
          trained_loss, true_param_loss, saving_hook,
          true_parameter_eval_graph)
    def _testBasics(self, num_unroll, length, pad, expected_seq1_batch1,
                    expected_seq2_batch1, expected_seq1_batch2,
                    expected_seq2_batch2, expected_seq3_batch1,
                    expected_seq3_batch2, expected_seq4_batch1,
                    expected_seq4_batch2):

        with self.test_session() as sess:
            next_batch = sqss.batch_sequences_with_states(
                input_key=self.key,
                input_sequences=self.sequences,
                input_context=self.context,
                input_length=length,
                initial_states=self.initial_states,
                num_unroll=num_unroll,
                batch_size=self.batch_size,
                num_threads=3,
                # to enforce that we only move on to the next examples after finishing
                # all segments of the first ones.
                capacity=2,
                pad=pad)

            state1 = next_batch.state("state1")
            state2 = next_batch.state("state2")
            state1_update = next_batch.save_state("state1", state1 + 1)
            state2_update = next_batch.save_state("state2", state2 - 1)

            # Make sure queue runner with SQSS is added properly to meta graph def.
            # Saver requires at least one variable.
            v0 = variables.Variable(10.0, name="v0")
            ops.add_to_collection("variable_collection", v0)
            variables.global_variables_initializer()
            save = saver.Saver([v0])
            test_dir = os.path.join(test.get_temp_dir(), "sqss_test")
            filename = os.path.join(test_dir, "metafile")
            meta_graph_def = save.export_meta_graph(filename)
            qr_saved = meta_graph_def.collection_def[
                ops.GraphKeys.QUEUE_RUNNERS]
            self.assertTrue(qr_saved.bytes_list.value is not None)

            coord = coordinator.Coordinator()
            threads = queue_runner_impl.start_queue_runners(coord=coord)

            # Step 1
            (key_value, next_key_value, seq1_value, seq2_value, seq3_value,
             seq4_value, context1_value, state1_value, state2_value,
             length_value, _, _) = sess.run(
                 (next_batch.key, next_batch.next_key,
                  next_batch.sequences["seq1"], next_batch.sequences["seq2"],
                  next_batch.sequences["seq3"], next_batch.sequences["seq4"],
                  next_batch.context["context1"], state1, state2,
                  next_batch.length, state1_update, state2_update))
            expected_first_keys = set([b"00000_of_00002"])
            expected_second_keys = set([b"00001_of_00002"])
            expected_final_keys = set([b"STOP"])

            self.assertEqual(expected_first_keys, self._prefix(key_value))
            self.assertEqual(expected_second_keys,
                             self._prefix(next_key_value))
            self.assertAllEqual(
                np.tile(self.context["context1"], (self.batch_size, 1)),
                context1_value)
            self.assertAllEqual(expected_seq1_batch1, seq1_value)
            self.assertAllEqual(expected_seq2_batch1, seq2_value)
            self.assertAllEqual(expected_seq3_batch1.indices,
                                seq3_value.indices)
            self.assertAllEqual(expected_seq3_batch1.values, seq3_value.values)
            self.assertAllEqual(expected_seq3_batch1.dense_shape,
                                seq3_value.dense_shape)
            self.assertAllEqual(expected_seq4_batch1.indices,
                                seq4_value.indices)
            self.assertAllEqual(expected_seq4_batch1.values, seq4_value.values)
            self.assertAllEqual(expected_seq4_batch1.dense_shape,
                                seq4_value.dense_shape)
            self.assertAllEqual(
                np.tile(self.initial_states["state1"],
                        (self.batch_size, 1, 1)), state1_value)
            self.assertAllEqual(
                np.tile(self.initial_states["state2"], (self.batch_size, 1)),
                state2_value)
            self.assertAllEqual(length_value, [num_unroll, num_unroll])

            # Step 2
            (key_value, next_key_value, seq1_value, seq2_value, seq3_value,
             seq4_value, context1_value, state1_value, state2_value,
             length_value, _, _) = sess.run(
                 (next_batch.key, next_batch.next_key,
                  next_batch.sequences["seq1"], next_batch.sequences["seq2"],
                  next_batch.sequences["seq3"], next_batch.sequences["seq4"],
                  next_batch.context["context1"], state1, state2,
                  next_batch.length, state1_update, state2_update))

            self.assertEqual(expected_second_keys, self._prefix(key_value))
            self.assertEqual(expected_final_keys, self._prefix(next_key_value))
            self.assertAllEqual(
                np.tile(self.context["context1"], (self.batch_size, 1)),
                context1_value)
            self.assertAllEqual(expected_seq1_batch2, seq1_value)
            self.assertAllEqual(expected_seq2_batch2, seq2_value)
            self.assertAllEqual(expected_seq3_batch2.indices,
                                seq3_value.indices)
            self.assertAllEqual(expected_seq3_batch2.values, seq3_value.values)
            self.assertAllEqual(expected_seq3_batch2.dense_shape,
                                seq3_value.dense_shape)
            self.assertAllEqual(expected_seq4_batch2.indices,
                                seq4_value.indices)
            self.assertAllEqual(expected_seq4_batch2.values, seq4_value.values)
            self.assertAllEqual(expected_seq4_batch2.dense_shape,
                                seq4_value.dense_shape)
            self.assertAllEqual(
                1 + np.tile(self.initial_states["state1"],
                            (self.batch_size, 1, 1)), state1_value)
            self.assertAllEqual(
                -1 + np.tile(self.initial_states["state2"],
                             (self.batch_size, 1)), state2_value)
            self.assertAllEqual([1, 1], length_value)

            coord.request_stop()
            coord.join(threads, stop_grace_period_secs=2)
Ejemplo n.º 40
0
def test(model, input_dims, output_dims, seq_length, size, dataset,
         loaded_dataset, experiment_name, num_vids, split, base_data_path,
         f_name, load_model, return_layer, clip_length, video_offset,
         clip_offset, num_clips, clip_stride, metrics_method, batch_size,
         metrics_dir, loaded_checkpoint, verbose, gpu_list, preproc_method,
         loaded_preproc, random_init, avg_clips, use_softmax,
         preproc_debugging, reverse, topk):
    """
    Function used to test the performance and analyse a chosen model
    Args:
        :model:              tf-activity-recognition framework model object
        :input_dims:         Number of frames used in input
        :output_dims:        Integer number of classes in current dataset
        :seq_length:         Length of output sequence expected from LSTM
        :size:               List detailing height and width of frame
        :dataset:            Name of dataset being loaded
        :loaded_dataset:     Name of dataset which was used to train the current model
        :experiment_name:    Name of current experiment
        :num_vids:           Number of videos to be used for testing
        :split:              Split of dataset being used
        :base_data_path:     Full path to root directory containing datasets
        :f_name:             Specific video directory within a chosen split of a dataset
        :load_model:         Boolean variable indicating whether to load from a checkpoint or not
        :return_layer:       Layer to return from the model, used to extract features
        :clip_length:        Length of clips to cut video into, -1 indicates using the entire video as one clip')
        :video_offset:       String indicating where to begin selecting video clips (provided clipOffset is None)
        :clip_offset:        "none" or "random" indicating where to begin selecting video clips
        :num_clips:          Number of clips to break video into
        :clip_stride:        Number of frames that overlap between clips, 0 indicates no overlap and negative values indicate a gap of frames between clips
        :metrics_method:     Which method to use to calculate accuracy metrics. ("default" or "svm")
        :batch_size:         Number of clips to load into the model each step.
        :metrics_dir:        Name of subdirectory within the experiment to store metrics. Unique directory names allow for parallel testing
        :loaded_checkpoint:  Specify the exact checkpoint of saved model to be loaded for further training/testing
        :verbose:            Boolean to indicate if all print statement should be procesed or not
        :gpu_list:           List of GPU IDs to be used
        :preproc_method:     The preprocessing method to use, default, cvr, rr, sr, or any other custom preprocessing
        :loaded_preproc:     Name of preproc method which was used to train the current model
        :random_init:        Randomly initialize model weights, not loading from any files (deafult False)
        :avg_clips:          Binary boolean indicating whether to average predictions across clips
        :use_softmax:        Binary boolean indicating whether to apply softmax to the inference of the model
        :preproc_debugging:  Boolean indicating whether to load videos and clips in a queue or to load them directly for debugging (Default 0)

    Returns:
        Does not return anything
    """

    with tf.name_scope("my_scope") as scope:

        # Initializers for checkpoint and global step variable
        ckpt = None
        gs_init = 0

        ################################### Checkpoint loading block #######################################################

        # Load pre-trained/saved model
        if load_model:
            try:
                ckpt, gs_init, learning_rate_init = load_checkpoint(
                    model.name, loaded_dataset, experiment_name,
                    loaded_checkpoint, loaded_preproc)
                if verbose:
                    print 'A better checkpoint is found. The global_step value is: ' + str(
                        gs_init)

            except:
                if verbose:
                    print "Failed loading checkpoint requested. Please check."
                exit()

            # END TRY

        else:
            ckpt = model.load_default_weights()

        # END IF

        ######################################################################################################################

        # Initialize model variables
        istraining = False
        global_step = tf.Variable(gs_init, name='global_step', trainable=False)
        number_of_videos = tf.Variable(num_vids,
                                       name='number_of_videos',
                                       trainable=False)
        video_step = tf.Variable(1.0, name='video_step', trainable=False)

        # TF session setup
        config = tf.ConfigProto(allow_soft_placement=True)
        sess = tf.Session(config=config)
        init = tf.global_variables_initializer()

        # Variables get randomly initialized into tf graph
        sess.run(init)

        data_path = os.path.join(base_data_path, 'tfrecords_' + dataset,
                                 'Split' + str(split), f_name)

        # Setting up tensors for models
        # input_data_tensor - [batchSize, inputDims, height, width, channels]
        input_data_tensor, labels_tensor, names_tensor = load_dataset(
            model,
            1,
            batch_size,
            output_dims,
            input_dims,
            seq_length,
            size,
            data_path,
            dataset,
            istraining,
            clip_length,
            video_offset,
            clip_offset,
            num_clips,
            clip_stride,
            video_step,
            preproc_debugging,
            0,
            verbose,
            reverse=reverse)

        ######### GPU list check block ####################

        assert (len(gpu_list) <= 1)

        if len(gpu_list) == 0:
            gpu_list = ['0']  # Default choice is ID = 0

        # END IF

        ###################################################

        ################################################## Setup TF graph block ######################################################

        # Model Inference
        with tf.device('/gpu:' + gpu_list[0]):
            logits = model.inference(
                input_data_tensor[0:batch_size, :, :, :, :],
                istraining,
                input_dims,
                output_dims,
                seq_length,
                scope,
                return_layer=return_layer)[0]

            # Logits shape: [batchSize, seqLength, outputDims] if not, reshape
            logits_shape = logits.get_shape().as_list()
            if (logits_shape[0] != batch_size or logits_shape[1] != seq_length
                    or logits_shape[2] != output_dims
                ) and return_layer[0] == 'logits':
                logits = tf.reshape(logits,
                                    [batch_size, seq_length, output_dims])

            # END IF

            if use_softmax:
                logits = tf.nn.softmax(logits)

            # END IF

        # END WITH

        ############################################################################################################################################

        if save_bool:
            ######################### Logger Setup block ######################################

            # Logger setup (Name format: Date, month, hour, minute and second, with a prefix of exp_test)
            log_name = ("exp_test_%s_%s_%s_%s_%s" %
                        (time.strftime("%d_%m_%H_%M_%S"), dataset,
                         preproc_method, experiment_name, metrics_method))

            curr_logger = Logger(
                os.path.join('logs', model.name, dataset, preproc_method,
                             metrics_dir, log_name))
            make_dir(os.path.join('results', model.name))
            make_dir(os.path.join('results', model.name, dataset))
            make_dir(
                os.path.join('results', model.name, dataset, preproc_method))
            make_dir(
                os.path.join('results', model.name, dataset, preproc_method,
                             experiment_name))
            make_dir(
                os.path.join('results', model.name, dataset, preproc_method,
                             experiment_name, metrics_dir))

            ###################################################################################

        # TF session setup
        #sess    = tf.Session()
        init = (tf.global_variables_initializer(),
                tf.local_variables_initializer())
        coord = tf.train.Coordinator()
        threads = queue_runner_impl.start_queue_runners(sess=sess, coord=coord)
        if save_bool:
            metrics = Metrics(output_dims,
                              seq_length,
                              curr_logger,
                              metrics_method,
                              istraining,
                              model.name,
                              experiment_name,
                              preproc_method,
                              dataset,
                              metrics_dir,
                              verbose=verbose,
                              topk=topk)

        # Variables get randomly initialized into tf graph
        sess.run(init)

        # Check that weights were loaded or random initializations are requested
        if ((ckpt == None) or (random_init)):
            print "Caution: Model weights are not being loaded, using random initialization."

        else:
            # Model variables initialized from previous saved models
            initialize_from_dict(sess, ckpt, model.name)

        # END IF

        del ckpt

        acc = 0
        count = 0
        videos_loaded = 0
        previous_vid_name = ''
        total_pred = []

        if verbose:
            print "Begin Testing"

        # END IF

        ########################################## Testing loop block ################################################################

        while videos_loaded <= num_vids:
            output_predictions, labels, names = sess.run(
                [logits, labels_tensor, names_tensor])

            if avg_clips:
                output_predictions = np.array([np.mean(output_predictions, 0)])
                names = names[:1]

            for batch_idx in range(len(names)):
                vid_name = names[batch_idx]
                if vid_name != previous_vid_name:
                    previous_vid_name = vid_name
                    videos_loaded += 1
                    if verbose:
                        print "Number of videos loaded: ", videos_loaded

                # Extract remaining clips from currently loaded video, once it finishes exit while loop
                if videos_loaded > num_vids:
                    break

                count += 1

                if save_bool:
                    metrics.log_prediction(labels[batch_idx][0],
                                           output_predictions[batch_idx],
                                           vid_name, count)

            # END IF

        # END WHILE

        #########################################################################################################################################################

    # END WITH

    coord.request_stop()
    coord.join(threads)

    if save_bool:
        total_accuracy = metrics.total_classification()
        total_pred = metrics.get_predictions_array()

        if verbose:
            print "Total accuracy : ", total_accuracy
            print total_pred

        # Save results in numpy format
        np.save(
            os.path.join(
                'results', model.name, dataset, preproc_method,
                experiment_name, metrics_dir,
                'test_predictions_' + dataset + "_" + metrics_method + '.npy'),
            np.array(total_pred))
Ejemplo n.º 41
0
def example_video(dataset, num_vids, split, base_data_path, f_name, vid_name, verbose):
    """
    Function used to test the performance and analyse a chosen model
    Args:
        :dataset:            Name of dataset being loaded
        :num_vids:           Number of videos to be used for training
        :split:              Split of dataset being used
        :base_data_path:     Full path to root directory containing datasets
        :f_name:             Specific video directory within a chosen split of a dataset
        :vid_name:           Name of video to load if desired
        :verbose:            Boolean to indicate if all print statement should be procesed or not


    Returns:
        Does not return anything
    """

    with tf.name_scope("my_scope") as scope:

        # Initialize model variables
        istraining       = False


        data_path   = os.path.join(base_data_path, 'tfrecords_'+dataset, 'Split'+str(split), f_name)

        # Setting up tensors for models
        input_data_tensor, labels_tensor, names_tensor = load_dataset_without_preprocessing(data_path, dataset, istraining, vid_name, verbose)

        # TF session setup
        config  = tf.ConfigProto(allow_soft_placement=True)
        sess    = tf.Session(config=config)
        init    = (tf.global_variables_initializer(), tf.local_variables_initializer())
        coord   = tf.train.Coordinator()
        threads = queue_runner_impl.start_queue_runners(sess=sess, coord=coord)

        # Variables get randomly initialized into tf graph
        sess.run(init)


        acc               = 0
        count             = 0
        videos_loaded     = 0
        previous_vid_name = ''
        total_pred        = []

        if verbose:
            print "Begin Testing"

        # END IF

        ########################################## Testing loop block ################################################################

        while videos_loaded <= num_vids:
            output, labels, names = sess.run([input_data_tensor, labels_tensor, names_tensor])

            import pdb; pdb.set_trace()

            for batch_idx in range(len(names)):
                vid_name = names[batch_idx]
                if vid_name != previous_vid_name:
                    previous_vid_name = vid_name
                    videos_loaded += 1
                    if verbose:
                        print "Number of videos loaded: ", videos_loaded


                # Extract remaining clips from currently loaded video, once it finishes exit while loop
                if videos_loaded > num_vids:
                    break

                count += 1


            # END IF

        # END WHILE

        #########################################################################################################################################################

    # END WITH

    coord.request_stop()
    coord.join(threads)
Ejemplo n.º 42
0
    def test_keyed_features_filter(self):
        gfile.Glob = self._orig_glob
        lines = [
            '{"features": {"feature": {"age": {"int64_list": {"value": [2]}}}}}',
            '{"features": {"feature": {"age": {"int64_list": {"value": [0]}}}}}',
            '{"features": {"feature": {"age": {"int64_list": {"value": [1]}}}}}',
            '{"features": {"feature": {"age": {"int64_list": {"value": [0]}}}}}',
            '{"features": {"feature": {"age": {"int64_list": {"value": [3]}}}}}',
            '{"features": {"feature": {"age": {"int64_list": {"value": [5]}}}}}'
        ]
        filename = self._create_temp_file("\n".join(lines))

        batch_size = 2
        queue_capacity = 4
        name = "my_batch"
        features = {"age": parsing_ops.FixedLenFeature([], dtypes_lib.int64)}

        def filter_fn(keys, examples_json):
            del keys
            serialized = parsing_ops.decode_json_example(examples_json)
            examples = parsing_ops.parse_example(serialized, features)
            return math_ops.less(examples["age"], 2)

        with ops.Graph().as_default() as g, self.test_session(
                graph=g) as session:
            keys, inputs = graph_io._read_keyed_batch_examples_helper(
                filename,
                batch_size,
                reader=io_ops.TextLineReader,
                randomize_input=False,
                num_epochs=1,
                read_batch_size=batch_size,
                queue_capacity=queue_capacity,
                filter_fn=filter_fn,
                name=name)
            self.assertAllEqual((None, ), keys.get_shape().as_list())
            self.assertAllEqual((None, ), inputs.get_shape().as_list())
            session.run(variables.local_variables_initializer())

            coord = coordinator.Coordinator()
            threads = queue_runner_impl.start_queue_runners(session,
                                                            coord=coord)
            # First batch of two filtered examples.
            out_keys, out_vals = session.run((keys, inputs))
            self.assertAllEqual([
                filename.encode("utf-8") + b":2",
                filename.encode("utf-8") + b":3"
            ], out_keys)
            self.assertAllEqual(
                [lines[1].encode("utf-8"), lines[2].encode("utf-8")], out_vals)

            # Second batch will only have one filtered example as that's the only
            # remaining example that satisfies the filtering criterion.
            out_keys, out_vals = session.run((keys, inputs))
            self.assertAllEqual([filename.encode("utf-8") + b":4"], out_keys)
            self.assertAllEqual([lines[3].encode("utf-8")], out_vals)

            # Exhausted input.
            with self.assertRaises(errors.OutOfRangeError):
                session.run((keys, inputs))

            coord.request_stop()
            coord.join(threads)