Ejemplo n.º 1
0
    def input_fn():
        x = {'INPUT': data['data']['features'],
             'INPUTLEN': data['data']['lengths'],
             'LBLWEIGHTS': data['data']['lbl_weights']}
        y = data['data']['labels']

        # Make a shadow copy and also ensure the order of iteration is consistent.
        ordered_dict_x = collections.OrderedDict(sorted(x.items(), key=lambda t: t[0]))

        unique_target_key = _get_unique_target_key(ordered_dict_x)
        if y is not None:
            ordered_dict_x[unique_target_key] = y

        queue = feeding_functions._enqueue_data(  # pylint: disable=protected-access
            ordered_dict_x,
            1000,
            shuffle=hypers.get_param('th'),
            num_threads=1,
            enqueue_size=data['batch_size'],
            num_epochs=hypers.get_param('ts')[hypers.get_param('ti')])

        features = queue.dequeue_many(data['batch_size'])
        # Remove the first `Tensor` in `features`, which is the row number.
        if len(features) > 0:
            features.pop(0)

        features = dict(zip(ordered_dict_x.keys(), features))
        if y is not None:
            target = features.pop(unique_target_key)
            return features, target

        return features
 def testPandasFeeding(self):
   if not HAS_PANDAS:
     return
   with ops.Graph().as_default():
     array1 = np.arange(32)
     array2 = np.arange(32, 64)
     df = pd.DataFrame({"a": array1, "b": array2}, index=np.arange(64, 96))
     q = ff._enqueue_data(df, capacity=100)
     batch_size = 5
     dq_op = q.dequeue_many(5)
     with session.Session() as sess:
       coord = coordinator.Coordinator()
       threads = queue_runner_impl.start_queue_runners(sess=sess, coord=coord)
       for i in range(100):
         indices = [
             j % array1.shape[0]
             for j in range(batch_size * i, batch_size * (i + 1))
         ]
         expected_df_indices = df.index[indices]
         expected_rows = df.iloc[indices]
         dq = sess.run(dq_op)
         np.testing.assert_array_equal(expected_df_indices, dq[0])
         for col_num, col in enumerate(df.columns):
           np.testing.assert_array_equal(expected_rows[col].values,
                                         dq[col_num + 1])
       coord.request_stop()
       coord.join(threads)
Ejemplo n.º 3
0
 def input_fn():
   """Pandas input function."""
   queue = feeding_functions._enqueue_data(  # pylint: disable=protected-access
       x,
       queue_capacity,
       shuffle=shuffle,
       min_after_dequeue=min_after_dequeue,
       num_threads=num_threads,
       enqueue_size=batch_size,
       num_epochs=num_epochs)
   if num_epochs is None:
     features = queue.dequeue_many(batch_size)
   else:
     features = queue.dequeue_up_to(batch_size)
   assert len(features) == len(x.columns) + 1, ('Features should have one '
                                                'extra element for the index.')
   features = features[1:]
   features = dict(zip(list(x.columns), features))
   if y is not None:
     if isinstance(target_column, list):
       keys = [k for k, _ in y_columns]
       values = [features.pop(column) for column in target_column]
       target = {k: v for k, v in zip(keys, values)}
     else:
       target = features.pop(target_column)
     return features, target
   return features
Ejemplo n.º 4
0
 def input_fn():
     """Pandas input function."""
     queue = feeding_functions._enqueue_data(  # pylint: disable=protected-access
         x,
         queue_capacity,
         shuffle=shuffle,
         min_after_dequeue=min_after_dequeue,
         num_threads=num_threads,
         enqueue_size=batch_size,
         num_epochs=num_epochs)
     if num_epochs is None:
         features = queue.dequeue_many(batch_size)
     else:
         features = queue.dequeue_up_to(batch_size)
     assert len(features) == len(x.columns) + 1, (
         'Features should have one '
         'extra element for the index.')
     features = features[1:]
     features = dict(zip(list(x.columns), features))
     if y is not None:
         if isinstance(target_column, list):
             keys = [k for k, _ in y_columns]
             values = [features.pop(column) for column in target_column]
             target = {k: v for k, v in zip(keys, values)}
         else:
             target = features.pop(target_column)
         return features, target
     return features
 def testArrayFeedingMultiThread(self):
   with ops.Graph().as_default():
     array = np.arange(256).reshape([128, 2])
     q = ff._enqueue_data(array, capacity=128, num_threads=8, shuffle=True)
     batch_size = 3
     dq_op = q.dequeue_many(batch_size)
     with session.Session() as sess:
       coord = coordinator.Coordinator()
       threads = queue_runner_impl.start_queue_runners(sess=sess, coord=coord)
       for _ in range(100):
         dq = sess.run(dq_op)
         indices = dq[0]
         expected_dq = get_rows(array, indices)
         np.testing.assert_array_equal(expected_dq, dq[1])
       coord.request_stop()
       coord.join(threads)
Ejemplo n.º 6
0
    def input_fn():
        """Numpy input function."""
        if not isinstance(x, dict):
            raise TypeError('x must be dict; got {}'.format(type(x).__name__))

        # Make a shadow copy and also ensure the order of iteration is consistent.
        ordered_dict_x = collections.OrderedDict(
            sorted(x.items(), key=lambda t: t[0]))

        unique_target_key = _get_unique_target_key(ordered_dict_x)
        if y is not None:
            ordered_dict_x[unique_target_key] = y

        if len(set(v.shape[0] for v in ordered_dict_x.values())) != 1:
            shape_dict_of_x = {
                k: ordered_dict_x[k].shape
                for k in ordered_dict_x.keys()
            }
            shape_of_y = None if y is None else y.shape
            raise ValueError('Length of tensors in x and y is mismatched. All '
                             'elements in x and y must have the same length.\n'
                             'Shapes in x: {}\n'
                             'Shape for y: {}\n'.format(
                                 shape_dict_of_x, shape_of_y))

        queue = feeding_functions._enqueue_data(  # pylint: disable=protected-access
            ordered_dict_x,
            queue_capacity,
            shuffle=shuffle,
            seed=seed,
            num_threads=num_threads,
            enqueue_size=batch_size,
            num_epochs=num_epochs)

        features = (queue.dequeue_many(batch_size)
                    if num_epochs is None else queue.dequeue_up_to(batch_size))

        # Remove the first `Tensor` in `features`, which is the row number.
        if len(features) > 0:
            features.pop(0)

        features = dict(zip(ordered_dict_x.keys(), features))
        if y is not None:
            target = features.pop(unique_target_key)
            return features, target
        return features
Ejemplo n.º 7
0
  def input_fn():
    """Numpy input function."""
    if not isinstance(x, dict):
      raise TypeError('x must be dict; got {}'.format(type(x).__name__))

    # Make a shadow copy and also ensure the order of iteration is consistent.
    ordered_dict_x = collections.OrderedDict(
        sorted(x.items(), key=lambda t: t[0]))

    unique_target_key = _get_unique_target_key(ordered_dict_x)
    if y is not None:
      ordered_dict_x[unique_target_key] = y

    if len(set(v.shape[0] for v in ordered_dict_x.values())) != 1:
      shape_dict_of_x = {k: ordered_dict_x[k].shape
                         for k in ordered_dict_x.keys()}
      shape_of_y = None if y is None else y.shape
      raise ValueError('Length of tensors in x and y is mismatched. All '
                       'elements in x and y must have the same length.\n'
                       'Shapes in x: {}\n'
                       'Shape for y: {}\n'.format(shape_dict_of_x, shape_of_y))

    queue = feeding_functions._enqueue_data(  # pylint: disable=protected-access
        ordered_dict_x,
        queue_capacity,
        shuffle=shuffle,
        num_threads=num_threads,
        enqueue_size=batch_size,
        num_epochs=num_epochs)

    features = (queue.dequeue_many(batch_size) if num_epochs is None
                else queue.dequeue_up_to(batch_size))

    # Remove the first `Tensor` in `features`, which is the row number.
    if len(features) > 0:
      features.pop(0)

    features = dict(zip(ordered_dict_x.keys(), features))
    if y is not None:
      target = features.pop(unique_target_key)
      return features, target
    return features
 def testArrayFeeding(self):
   with ops.Graph().as_default():
     array = np.arange(32).reshape([16, 2])
     q = ff._enqueue_data(array, capacity=100)
     batch_size = 3
     dq_op = q.dequeue_many(batch_size)
     with session.Session() as sess:
       coord = coordinator.Coordinator()
       threads = queue_runner_impl.start_queue_runners(sess=sess, coord=coord)
       for i in range(100):
         indices = [
             j % array.shape[0]
             for j in range(batch_size * i, batch_size * (i + 1))
         ]
         expected_dq = get_rows(array, indices)
         dq = sess.run(dq_op)
         np.testing.assert_array_equal(indices, dq[0])
         np.testing.assert_array_equal(expected_dq, dq[1])
       coord.request_stop()
       coord.join(threads)
Ejemplo n.º 9
0
        def input_fn():
            # Using queue with multiple threads to make it scalable
            pandas_queue = ff._enqueue_data(df,
                                            capacity=1024,
                                            shuffle=True,
                                            min_after_dequeue=256,
                                            num_threads=4,
                                            enqueue_size=16,
                                            num_epochs=epochs)

            _, color_name, r, g, b, seq_len = pandas_queue.dequeue_up_to(
                batch_size)

            # Split strings into chars
            split_color_name = tf.string_split(color_name, delimiter='')
            # Creating a tf constant to hold the map char -> index
            # this is need to create the sparse tensor and after the one hot encode
            mapping = tf.constant(CHARACTERS, name="mapping")
            # Names represented in a sparse tensor
            integerized_color_name = _sparse_string_to_index(
                split_color_name, mapping)

            # Tensor of normalized RGB values
            rgb = tf.to_float(tf.stack([r, g, b], axis=1)) / 255.0

            # Generates batcheds
            batched = tf.train.shuffle_batch(
                {
                    COLOR_NAME_KEY: integerized_color_name,
                    SEQUENCE_LENGTH_KEY: seq_len,
                    RGB_KEY: rgb
                },
                batch_size,
                min_after_dequeue=100,
                num_threads=4,
                capacity=1000,
                enqueue_many=True,
                allow_smaller_final_batch=True)
            label = batched.pop(RGB_KEY)
            return batched, label
 def testPandasFeedingMultiThread(self):
   if not HAS_PANDAS:
     return
   with ops.Graph().as_default():
     array1 = np.arange(128, 256)
     array2 = 2 * array1
     df = pd.DataFrame({"a": array1, "b": array2}, index=np.arange(128))
     q = ff._enqueue_data(df, capacity=128, num_threads=8, shuffle=True)
     batch_size = 5
     dq_op = q.dequeue_many(batch_size)
     with session.Session() as sess:
       coord = coordinator.Coordinator()
       threads = queue_runner_impl.start_queue_runners(sess=sess, coord=coord)
       for _ in range(100):
         dq = sess.run(dq_op)
         indices = dq[0]
         expected_rows = df.iloc[indices]
         for col_num, col in enumerate(df.columns):
           np.testing.assert_array_equal(expected_rows[col].values,
                                         dq[col_num + 1])
       coord.request_stop()
       coord.join(threads)
Ejemplo n.º 11
0
    def input_fn():
        ordered_dict_x = collections.OrderedDict(
            sorted(x.items(), key=lambda t: t[0]))
        target_keys = []
        for tar_key in y:
            target_keys.append(tar_key)
            ordered_dict_x[tar_key] = y[tar_key]
        queue = feeding_functions._enqueue_data(  # pylint: disable=protected-access
            ordered_dict_x,
            queue_capacity,
            shuffle=shuffle,
            num_threads=num_threads,
            enqueue_size=batch_size,
            num_epochs=num_epochs)
        features = (queue.dequeue_many(batch_size)
                    if num_epochs is None else queue.dequeue_up_to(batch_size))

        if features:
            features.pop(0)
        features = dict(zip(ordered_dict_x.keys(), features))
        target = {}
        for tar_key in target_keys:
            target[tar_key] = features.pop(tar_key)
        return features, target
Ejemplo n.º 12
0
    def input_fn():
        ordered_dict_x = collections.OrderedDict(
            sorted(x.items(), key=lambda t: t[0]))
        target_keys = []
        for tar_key in y:
            target_keys.append(tar_key)
            ordered_dict_x[tar_key] = y[tar_key]
        queue = feeding_functions._enqueue_data(  # pylint: disable=protected-access
            ordered_dict_x,
            queue_capacity,
            shuffle=shuffle,
            num_threads=num_threads,
            enqueue_size=batch_size,
            num_epochs=num_epochs)
        features = (queue.dequeue_many(batch_size)
                    if num_epochs is None else queue.dequeue_up_to(batch_size))

        if features:
            features.pop(0)
        features = dict(zip(ordered_dict_x.keys(), features))
        target = {}
        for tar_key in target_keys:
            target[tar_key] = features.pop(tar_key)
        return features, target
Ejemplo n.º 13
0
	def input_fn():
		ordered_dict_x = collections.OrderedDict(
				sorted(x.items(), key=lambda t: t[0]))

		unique_target_key = _get_unique_target_key(ordered_dict_x)
		if y is not None:
			ordered_dict_x[unique_target_key] = y

		if len(set(v.shape[0] for v in ordered_dict_x.values())) != 1:
			shape_dict_of_x = {k: ordered_dict_x[k].shape
												 for k in ordered_dict_x.keys()}
			shape_of_y = None if y is None else y.shape
			raise ValueError('Length of tensors in x and y is mismatched. All '
											 'elements in x and y must have the same length.\n'
											 'Shapes in x: {}\n'
											 'Shape for y: {}\n'.format(shape_dict_of_x, shape_of_y))

		print ordered_dict_x
		queue = feeding_functions._enqueue_data(	# pylint: disable=protected-access
				ordered_dict_x,
				queue_capacity,
				shuffle=shuffle,
				num_threads=num_threads,
				enqueue_size=batch_size,
				num_epochs=num_epochs)

		features = (queue.dequeue_many(batch_size) if num_epochs is None
								else queue.dequeue_up_to(batch_size))
		if len(features) > 0:
			features.pop(0)

		features = dict(zip(ordered_dict_x.keys(), features))
		if y is not None:
			target = features.pop(unique_target_key)
			return read_images(features), target
		return read_images(features)
Ejemplo n.º 14
0
  def input_fn():
    """Numpy input function."""
    if not isinstance(x, dict):
      raise TypeError('x must be dict; got {}'.format(type(x).__name__))
    if not x:
      raise ValueError('x cannot be empty')

    # Make a shadow copy and also ensure the order of iteration is consistent.
    ordered_dict_data = collections.OrderedDict(
        sorted(x.items(), key=lambda t: t[0]))
    # Deep copy keys which is a view in python 3
    feature_keys = list(ordered_dict_data.keys())

    if y is None:
      target_keys = None
    elif isinstance(y, dict):
      if not y:
        raise ValueError('y cannot be empty dict, use None instead.')

      ordered_dict_y = collections.OrderedDict(
        sorted(y.items(), key=lambda t: t[0]))
      target_keys = list(ordered_dict_y.keys())

      duplicate_keys = set(feature_keys).intersection(set(target_keys))
      if len(duplicate_keys):
        raise ValueError('{} duplicate keys are found in both x and y: '
                         '{}'.format(len(duplicate_keys), duplicate_keys))

      ordered_dict_data.update(ordered_dict_y)
    else:
      target_keys = _get_unique_target_key(ordered_dict_data)
      ordered_dict_data[target_keys] = y

    if len(set(v.shape[0] for v in ordered_dict_data.values())) != 1:
      shape_dict_of_x = {k: ordered_dict_data[k].shape
                         for k in feature_keys}

      if target_keys is None:
        shape_of_y = None
      elif isinstance(target_keys, string_types):
        shape_of_y = y.shape
      else:
        shape_of_y = {k: ordered_dict_data[k].shape
                      for k in target_keys}

      raise ValueError('Length of tensors in x and y is mismatched. All '
                       'elements in x and y must have the same length.\n'
                       'Shapes in x: {}\n'
                       'Shapes in y: {}\n'.format(shape_dict_of_x, shape_of_y))

    queue = feeding_functions._enqueue_data(  # pylint: disable=protected-access
        ordered_dict_data,
        queue_capacity,
        shuffle=shuffle,
        num_threads=num_threads,
        enqueue_size=batch_size,
        num_epochs=num_epochs)

    batch = (queue.dequeue_many(batch_size) if num_epochs is None
                else queue.dequeue_up_to(batch_size))

    # Remove the first `Tensor` in `batch`, which is the row number.
    if len(batch) > 0:
      batch.pop(0)

    features = dict(zip(feature_keys, batch[:len(feature_keys)]))
    if target_keys is None:
      # TODO(martinwicke), return consistent result
      return features
    elif isinstance(target_keys, string_types):
      target = batch[-1]
      return features, target
    else:
      target = dict(zip(target_keys, batch[-len(target_keys):]))
      return features, target
Ejemplo n.º 15
0
def enqueue_data(*args, **kwargs):
  return _enqueue_data(*args, **kwargs)
Ejemplo n.º 16
0
  def input_fn():
    """Numpy input function."""

    # Note that `x` should not be used after conversion to ordered_dict_data,
    # as type could be either dict or array.
    ordered_dict_data = _validate_and_convert_features(x)

    # Deep copy keys which is a view in python 3
    feature_keys = list(ordered_dict_data.keys())

    if y is None:
      target_keys = None
    elif isinstance(y, dict):
      if not y:
        raise ValueError('y cannot be empty dict, use None instead.')

      ordered_dict_y = collections.OrderedDict(
          sorted(y.items(), key=lambda t: t[0]))
      target_keys = list(ordered_dict_y.keys())

      duplicate_keys = set(feature_keys).intersection(set(target_keys))
      if duplicate_keys:
        raise ValueError('{} duplicate keys are found in both x and y: '
                         '{}'.format(len(duplicate_keys), duplicate_keys))

      ordered_dict_data.update(ordered_dict_y)
    else:
      target_keys = _get_unique_target_key(ordered_dict_data)
      ordered_dict_data[target_keys] = y

    if len(set(v.shape[0] for v in ordered_dict_data.values())) != 1:
      shape_dict_of_x = {k: ordered_dict_data[k].shape for k in feature_keys}

      if target_keys is None:
        shape_of_y = None
      elif isinstance(target_keys, string_types):
        shape_of_y = y.shape
      else:
        shape_of_y = {k: ordered_dict_data[k].shape for k in target_keys}

      raise ValueError('Length of tensors in x and y is mismatched. All '
                       'elements in x and y must have the same length.\n'
                       'Shapes in x: {}\n'
                       'Shapes in y: {}\n'.format(shape_dict_of_x, shape_of_y))

    queue = feeding_functions._enqueue_data(  # pylint: disable=protected-access
        ordered_dict_data,
        queue_capacity,
        shuffle=shuffle,
        num_threads=num_threads,
        enqueue_size=batch_size,
        num_epochs=num_epochs)

    batch = (
        queue.dequeue_many(batch_size)
        if num_epochs is None else queue.dequeue_up_to(batch_size))

    # Remove the first `Tensor` in `batch`, which is the row number.
    if batch:
      batch.pop(0)

    if isinstance(x, np.ndarray):
      # Return as the same type as original array.
      features = batch[0]
    else:
      # Return as the original dict type
      features = dict(zip(feature_keys, batch[:len(feature_keys)]))

    if target_keys is None:
      # TODO(martinwicke), return consistent result
      return features
    elif isinstance(target_keys, string_types):
      target = batch[-1]
      return features, target
    else:
      target = dict(zip(target_keys, batch[-len(target_keys):]))
      return features, target
Ejemplo n.º 17
0
    def input_fn():
        """Numpy input function."""
        if not isinstance(x, dict):
            raise TypeError('x must be dict; got {}'.format(type(x).__name__))
        if not x:
            raise ValueError('x cannot be empty')

        # Make a shadow copy and also ensure the order of iteration is consistent.
        ordered_dict_data = collections.OrderedDict(
            sorted(x.items(), key=lambda t: t[0]))
        # Deep copy keys which is a view in python 3
        feature_keys = list(ordered_dict_data.keys())

        if y is None:
            target_keys = None
        elif isinstance(y, dict):
            if not y:
                raise ValueError('y cannot be empty dict, use None instead.')

            ordered_dict_y = collections.OrderedDict(
                sorted(y.items(), key=lambda t: t[0]))
            target_keys = list(ordered_dict_y.keys())

            duplicate_keys = set(feature_keys).intersection(set(target_keys))
            if duplicate_keys:
                raise ValueError(
                    '{} duplicate keys are found in both x and y: '
                    '{}'.format(len(duplicate_keys), duplicate_keys))

            ordered_dict_data.update(ordered_dict_y)
        else:
            target_keys = _get_unique_target_key(ordered_dict_data)
            ordered_dict_data[target_keys] = y

        if len(set(v.shape[0] for v in ordered_dict_data.values())) != 1:
            shape_dict_of_x = {
                k: ordered_dict_data[k].shape
                for k in feature_keys
            }

            if target_keys is None:
                shape_of_y = None
            elif isinstance(target_keys, string_types):
                shape_of_y = y.shape
            else:
                shape_of_y = {
                    k: ordered_dict_data[k].shape
                    for k in target_keys
                }

            raise ValueError('Length of tensors in x and y is mismatched. All '
                             'elements in x and y must have the same length.\n'
                             'Shapes in x: {}\n'
                             'Shapes in y: {}\n'.format(
                                 shape_dict_of_x, shape_of_y))

        queue = feeding_functions._enqueue_data(  # pylint: disable=protected-access
            ordered_dict_data,
            queue_capacity,
            shuffle=shuffle,
            num_threads=num_threads,
            enqueue_size=batch_size,
            num_epochs=num_epochs)

        batch = (queue.dequeue_many(batch_size)
                 if num_epochs is None else queue.dequeue_up_to(batch_size))

        # Remove the first `Tensor` in `batch`, which is the row number.
        if batch:
            batch.pop(0)

        features = dict(zip(feature_keys, batch[:len(feature_keys)]))
        if target_keys is None:
            # TODO(martinwicke), return consistent result
            return features
        elif isinstance(target_keys, string_types):
            target = batch[-1]
            return features, target
        else:
            target = dict(zip(target_keys, batch[-len(target_keys):]))
            return features, target
Ejemplo n.º 18
0
def enqueue_data(*args, **kwargs):
    return _enqueue_data(*args, **kwargs)