Esempio n. 1
0
 def insert_transformed_feature(self, columns_to_tensors):
   # Bucketize the source column.
   if self.source_column not in columns_to_tensors:
     self.source_column.insert_transformed_feature(columns_to_tensors)
   columns_to_tensors[self] = bucketization_op.bucketize(
       columns_to_tensors[self.source_column],
       boundaries=list(self.boundaries))
 def test_normal_usecase(self):
   op = bucketization_op.bucketize(
       constant_op.constant([-5, 0, 2, 3, 5, 8, 10, 11, 12]),
       boundaries=[0, 3, 8, 11])
   expected_out = [0, 1, 1, 2, 2, 3, 3, 4, 4]
   with self.test_session() as sess:
     self.assertAllEqual(expected_out, sess.run(op))
Esempio n. 3
0
 def insert_transformed_feature(self, columns_to_tensors):
   # Bucketize the source column.
   if self.source_column not in columns_to_tensors:
     self.source_column.insert_transformed_feature(columns_to_tensors)
   columns_to_tensors[self] = bucketization_op.bucketize(
       columns_to_tensors[self.source_column],
       boundaries=list(self.boundaries))
Esempio n. 4
0
def _bucketize(instances, feature, schema, metadata):
    """Applies the bucketize transform to a numeric field.
  """
    field = schema[feature.field]
    if not field.numeric:
        raise ValueError(
            'A scale transform cannot be applied to non-numerical field "%s".'
            % feature.field)

    transform = feature.transform
    boundaries = map(float, transform['boundaries'].split(','))

    # TODO: Figure out how to use tf.case instead of this contrib op
    from tensorflow.contrib.layers.python.ops.bucketization_op import bucketize

    # Create a one-hot encoded tensor. The dimension of this tensor is the set of buckets defined
    # by N boundaries == N + 1.
    # A squeeze is needed to remove the extra dimension added to the shape.
    value = instances[feature.field]

    value = tf.squeeze(tf.one_hot(bucketize(value, boundaries, name='bucket'),
                                  depth=len(boundaries) + 1,
                                  on_value=1.0,
                                  off_value=0.0,
                                  name='one_hot'),
                       axis=1,
                       name='bucketize')
    value.set_shape((None, len(boundaries) + 1))
    return value
 def test_normal_usecase(self):
     op = bucketization_op.bucketize(constant_op.constant(
         [-5, 0, 2, 3, 5, 8, 10, 11, 12]),
                                     boundaries=[0, 3, 8, 11])
     expected_out = [0, 1, 1, 2, 2, 3, 3, 4, 4]
     with self.test_session() as sess:
         self.assertAllEqual(expected_out, sess.run(op))
Esempio n. 6
0
    def _transform_feature(self, inputs):
        """Handles cross transformation."""
        # Bucketize the source column.
        if not self.add_random:
            return bucketization_op.bucketize(inputs.get(self.source_column),
                                              boundaries=list(self.boundaries),
                                              name="bucketize")
        else:
            rawts = inputs.get(self.source_column)
            tbn = np.asarray(self.boundaries[1:])
            if len(tbn) > 30:
                # noise =  min(np.median(tbn)-tbn[0],tbn[20:-20].std())/2.
                noise = tbn[10:-10].std() / 10.
                rndts = rawts + random_normal(array_ops.shape(rawts), 0, noise)

                return bucketization_op.bucketize(rndts,
                                                  boundaries=list(
                                                      self.boundaries),
                                                  name="bucketize")
            else:
                return bucketization_op.bucketize(rawts,
                                                  boundaries=list(
                                                      self.boundaries),
                                                  name="bucketize")
 def test_invalid_boundaries_order(self):
   op = bucketization_op.bucketize(
       constant_op.constant([-5, 0]), boundaries=[0, 8, 3, 11])
   with self.test_session() as sess:
     with self.assertRaises(errors_impl.InvalidArgumentError):
       sess.run(op)
 def test_invalid_boundaries_order(self):
     op = bucketization_op.bucketize(constant_op.constant([-5, 0]),
                                     boundaries=[0, 8, 3, 11])
     with self.test_session() as sess:
         with self.assertRaises(errors_impl.InvalidArgumentError):
             sess.run(op)