예제 #1
0
 def testPandasFeeding(self):
     if not HAS_PANDAS:
         return
     with ops.Graph().as_default():
         array1 = np.arange(32)
         array2 = np.arange(32, 64)
         df = pd.DataFrame({
             "a": array1,
             "b": array2
         },
                           index=np.arange(64, 96))
         q = ff._enqueue_data(df, capacity=100)
         batch_size = 5
         dq_op = q.dequeue_many(5)
         with session.Session() as sess:
             coord = coordinator.Coordinator()
             threads = queue_runner_impl.start_queue_runners(sess=sess,
                                                             coord=coord)
             for i in range(100):
                 indices = [
                     j % array1.shape[0]
                     for j in range(batch_size * i, batch_size * (i + 1))
                 ]
                 expected_df_indices = df.index[indices]
                 expected_rows = df.iloc[indices]
                 dq = sess.run(dq_op)
                 np.testing.assert_array_equal(expected_df_indices, dq[0])
                 for col_num, col in enumerate(df.columns):
                     np.testing.assert_array_equal(
                         expected_rows[col].values, dq[col_num + 1])
             coord.request_stop()
             coord.join(threads)
예제 #2
0
 def input_fn():
     """Pandas input function."""
     queue = feeding_functions._enqueue_data(  # pylint: disable=protected-access
         x,
         queue_capacity,
         shuffle=shuffle,
         min_after_dequeue=min_after_dequeue,
         num_threads=num_threads,
         enqueue_size=batch_size,
         num_epochs=num_epochs)
     if num_epochs is None:
         features = queue.dequeue_many(batch_size)
     else:
         features = queue.dequeue_up_to(batch_size)
     assert len(features) == len(x.columns) + 1, (
         'Features should have one '
         'extra element for the index.')
     features = features[1:]
     features = dict(zip(list(x.columns), features))
     if y is not None:
         if isinstance(target_column, list):
             keys = [k for k, _ in y_columns]
             values = [features.pop(column) for column in target_column]
             target = {k: v for k, v in zip(keys, values)}
         else:
             target = features.pop(target_column)
         return features, target
     return features
예제 #3
0
 def testArrayFeedingMultiThread(self):
     with ops.Graph().as_default():
         array = np.arange(256).reshape([128, 2])
         q = ff._enqueue_data(array,
                              capacity=128,
                              num_threads=8,
                              shuffle=True)
         batch_size = 3
         dq_op = q.dequeue_many(batch_size)
         with session.Session() as sess:
             coord = coordinator.Coordinator()
             threads = queue_runner_impl.start_queue_runners(sess=sess,
                                                             coord=coord)
             for _ in range(100):
                 dq = sess.run(dq_op)
                 indices = dq[0]
                 expected_dq = get_rows(array, indices)
                 np.testing.assert_array_equal(expected_dq, dq[1])
             coord.request_stop()
             coord.join(threads)
예제 #4
0
 def testArrayFeeding(self):
     with ops.Graph().as_default():
         array = np.arange(32).reshape([16, 2])
         q = ff._enqueue_data(array, capacity=100)
         batch_size = 3
         dq_op = q.dequeue_many(batch_size)
         with session.Session() as sess:
             coord = coordinator.Coordinator()
             threads = queue_runner_impl.start_queue_runners(sess=sess,
                                                             coord=coord)
             for i in range(100):
                 indices = [
                     j % array.shape[0]
                     for j in range(batch_size * i, batch_size * (i + 1))
                 ]
                 expected_dq = get_rows(array, indices)
                 dq = sess.run(dq_op)
                 np.testing.assert_array_equal(indices, dq[0])
                 np.testing.assert_array_equal(expected_dq, dq[1])
             coord.request_stop()
             coord.join(threads)
예제 #5
0
 def testPandasFeedingMultiThread(self):
     if not HAS_PANDAS:
         return
     with ops.Graph().as_default():
         array1 = np.arange(128, 256)
         array2 = 2 * array1
         df = pd.DataFrame({"a": array1, "b": array2}, index=np.arange(128))
         q = ff._enqueue_data(df, capacity=128, num_threads=8, shuffle=True)
         batch_size = 5
         dq_op = q.dequeue_many(batch_size)
         with session.Session() as sess:
             coord = coordinator.Coordinator()
             threads = queue_runner_impl.start_queue_runners(sess=sess,
                                                             coord=coord)
             for _ in range(100):
                 dq = sess.run(dq_op)
                 indices = dq[0]
                 expected_rows = df.iloc[indices]
                 for col_num, col in enumerate(df.columns):
                     np.testing.assert_array_equal(
                         expected_rows[col].values, dq[col_num + 1])
             coord.request_stop()
             coord.join(threads)
예제 #6
0
    def input_fn():
        """Numpy input function."""

        # Note that `x` should not be used after conversion to ordered_dict_data,
        # as type could be either dict or array.
        ordered_dict_data = _validate_and_convert_features(x)

        # Deep copy keys which is a view in python 3
        feature_keys = list(ordered_dict_data.keys())

        if y is None:
            target_keys = None
        elif isinstance(y, dict):
            if not y:
                raise ValueError('y cannot be empty dict, use None instead.')

            ordered_dict_y = collections.OrderedDict(
                sorted(y.items(), key=lambda t: t[0]))
            target_keys = list(ordered_dict_y.keys())

            duplicate_keys = set(feature_keys).intersection(set(target_keys))
            if duplicate_keys:
                raise ValueError(
                    '{} duplicate keys are found in both x and y: '
                    '{}'.format(len(duplicate_keys), duplicate_keys))

            ordered_dict_data.update(ordered_dict_y)
        else:
            target_keys = _get_unique_target_key(ordered_dict_data)
            ordered_dict_data[target_keys] = y

        if len(set(v.shape[0] for v in ordered_dict_data.values())) != 1:
            shape_dict_of_x = {
                k: ordered_dict_data[k].shape
                for k in feature_keys
            }

            if target_keys is None:
                shape_of_y = None
            elif isinstance(target_keys, string_types):
                shape_of_y = y.shape
            else:
                shape_of_y = {
                    k: ordered_dict_data[k].shape
                    for k in target_keys
                }

            raise ValueError('Length of tensors in x and y is mismatched. All '
                             'elements in x and y must have the same length.\n'
                             'Shapes in x: {}\n'
                             'Shapes in y: {}\n'.format(
                                 shape_dict_of_x, shape_of_y))

        queue = feeding_functions._enqueue_data(  # pylint: disable=protected-access
            ordered_dict_data,
            queue_capacity,
            shuffle=shuffle,
            num_threads=num_threads,
            enqueue_size=batch_size,
            num_epochs=num_epochs)

        batch = (queue.dequeue_many(batch_size)
                 if num_epochs is None else queue.dequeue_up_to(batch_size))

        # Remove the first `Tensor` in `batch`, which is the row number.
        if batch:
            batch.pop(0)

        if isinstance(x, np.ndarray):
            # Return as the same type as original array.
            features = batch[0]
        else:
            # Return as the original dict type
            features = dict(zip(feature_keys, batch[:len(feature_keys)]))

        if target_keys is None:
            # TODO(martinwicke), return consistent result
            return features
        elif isinstance(target_keys, string_types):
            target = batch[-1]
            return features, target
        else:
            target = dict(zip(target_keys, batch[-len(target_keys):]))
            return features, target