def batched_boarders_and_data( data_min_size=5, data_max_size=10, examples_min_number=1, examples_max_number=4, example_min_size=1, example_max_size=3, dtype=np.float32, elements=None, ): dims_ = st.tuples( st.integers(min_value=data_min_size, max_value=data_max_size), st.integers(min_value=examples_min_number, max_value=examples_max_number), st.integers(min_value=example_min_size, max_value=example_max_size), ) return dims_.flatmap( lambda dims: st.tuples( hu.arrays( [dims[1], dims[2], 2], dtype=np.int32, elements=st.integers(min_value=0, max_value=dims[0]), ), hu.arrays([dims[0]], dtype, elements), ) )
class TestEnsureClipped(hu.HypothesisTestCase): @given(X=hu.arrays(dims=[5, 10], elements=hu.floats(min_value=-1.0, max_value=1.0)), in_place=st.booleans(), sparse=st.booleans(), indices=hu.arrays(dims=[5], elements=st.booleans()), **hu.gcs_cpu_only) def test_ensure_clipped(self, X, in_place, sparse, indices, gc, dc): if (not in_place) and sparse: return param = X.astype(np.float32) m, n = param.shape indices = np.array(np.nonzero(indices)[0], dtype=np.int64) grad = np.random.rand(len(indices), n) workspace.FeedBlob("indices", indices) workspace.FeedBlob("grad", grad) workspace.FeedBlob("param", param) input = ["param", "indices", "grad"] if sparse else ["param"] output = "param" if in_place else "output" op = core.CreateOperator("EnsureClipped", input, output, min=0.0) workspace.RunOperatorOnce(op) def ref(): return (np.array([ np.clip(X[i], 0, None) if i in indices else X[i] for i in range(m) ]) if sparse else np.clip(X, 0, None)) npt.assert_allclose(workspace.blobs[output], ref(), rtol=1e-3)
class TestLengthsTileOp(hu.HypothesisTestCase): @given(inputs=st.integers( min_value=1, max_value=20).flatmap(lambda size: st.tuples( hu.arrays([size]), hu.arrays([size], dtype=np.int32, elements=st.integers(min_value=0, max_value=20)), )), **hu.gcs_cpu_only) def test_lengths_tile(self, inputs, gc, dc): data, lengths = inputs def lengths_tile_op(data, lengths): return [np.concatenate([[d] * l for d, l in zip(data, lengths)])] op = core.CreateOperator("LengthsTile", ["data", "lengths"], ["output"]) self.assertReferenceChecks( device_option=gc, op=op, inputs=[data, lengths], reference=lengths_tile_op, ) self.assertGradientChecks(device_option=gc, op=op, inputs=[data, lengths], outputs_to_check=0, outputs_with_grads=[0])
class TestAPMeterOps(hu.HypothesisTestCase): @given(predictions=hu.arrays(dims=[10, 3], elements=st.floats(allow_nan=False, allow_infinity=False, min_value=0.1, max_value=1)), labels=hu.arrays(dims=[10, 3], dtype=np.int32, elements=st.integers(min_value=0, max_value=1)), **hu.gcs_cpu_only) def test_average_precision(self, predictions, labels, gc, dc): op = core.CreateOperator( "APMeter", ["predictions", "labels"], ["AP"], buffer_size=10, ) def op_ref(predictions, labels): ap = calculate_ap(predictions, labels) return (ap, ) self.assertReferenceChecks( device_option=gc, op=op, inputs=[predictions, labels], reference=op_ref) @given(predictions=hu.arrays(dims=[10, 3], elements=st.floats(allow_nan=False, allow_infinity=False, min_value=0.1, max_value=1)), labels=hu.arrays(dims=[10, 3], dtype=np.int32, elements=st.integers(min_value=0, max_value=1)), **hu.gcs_cpu_only) def test_average_precision_small_buffer(self, predictions, labels, gc, dc): op_small_buffer = core.CreateOperator( "APMeter", ["predictions", "labels"], ["AP"], buffer_size=5, ) def op_ref(predictions, labels): # We can only hold the last 5 in the buffer ap = calculate_ap(predictions[5:], labels[5:]) return (ap, ) self.assertReferenceChecks( device_option=gc, op=op_small_buffer, inputs=[predictions, labels], reference=op_ref )
def gen_with_size(args): lengths, inner_shape = args data_dim = [sum(lengths)] + inner_shape lengths = np.array(lengths, dtype=np.int32) if with_pad_data: return st.tuples(st.just(lengths), hu.arrays(data_dim), hu.arrays(inner_shape), hu.arrays(inner_shape)) else: return st.tuples(st.just(lengths), hu.arrays(data_dim))
def gen_with_size(args): lengths, inner_shape = args data_dim = [sum(lengths)] + inner_shape lengths = np.array(lengths, dtype=np.int32) if with_pad_data: return st.tuples( st.just(lengths), hu.arrays(data_dim), hu.arrays(inner_shape), hu.arrays(inner_shape)) else: return st.tuples(st.just(lengths), hu.arrays(data_dim))
class TestCrossEntropyOps(hu.HypothesisTestCase): @given( inputs=st.lists( elements=st.integers(min_value=1, max_value=5), min_size=1, max_size=2, average_size=2, ).flatmap( lambda shape: st.tuples( hu.arrays( dims=shape, elements=st.one_of( st.floats(min_value=-1.0, max_value=-0.1), st.floats(min_value=0.1, max_value=1.0), )), hu.arrays( dims=shape, elements=st.sampled_from([0.0, 1.0]), ), ) ), ) def test_sigmoid_cross_entropy_with_logits(self, inputs): logits, targets = inputs def sigmoid_xentr_logit_ref(logits, targets): s = sigmoid_cross_entropy_with_logits(logits, targets) m = np.mean(s, axis=len(logits.shape) - 1) return (m, ) def sigmoid_xentr_logit_grad_ref(g_out, outputs, fwd_inputs): fwd_logits, fwd_targets = fwd_inputs inner_size = fwd_logits.shape[-1] m = fwd_targets - sigmoid(fwd_logits) g_in = -np.expand_dims(g_out, axis=-1) * m / inner_size return (g_in, None) op = core.CreateOperator( 'SigmoidCrossEntropyWithLogits', ['logits', 'targets'], ['xentropy']) self.assertReferenceChecks( hu.cpu_do, op, [logits, targets], sigmoid_xentr_logit_ref, output_to_grad='xentropy', grad_reference=sigmoid_xentr_logit_grad_ref)
class TestKeySplitOps(hu.HypothesisTestCase): @given( X=hu.arrays( dims=[1000], dtype=np.int64, elements=st.integers(min_value=0, max_value=100) ), **hu.gcs_cpu_only ) def test_key_split_op(self, X, gc, dc): categorical_limit = max(X) + 1 workspace.ResetWorkspace() workspace.FeedBlob('X', X) output_blobs = ['Y_%d' % i for i in range(categorical_limit)] op = core.CreateOperator( 'KeySplit', ['X'], output_blobs, categorical_limit=categorical_limit ) workspace.RunOperatorOnce(op) output_vecs = [ workspace.blobs[output_blobs[i]] for i in range(categorical_limit) ] expected_output_vecs = [[] for _ in range(categorical_limit)] for i, x in enumerate(X): expected_output_vecs[x].append(i) for i in range(categorical_limit): np.testing.assert_array_equal( output_vecs[i], np.array(expected_output_vecs[i], dtype=np.int32) )
def test_sparse_lengths_fp16(self, input, data_strategy, is_mean, gc, dc): m = input.shape[0] lengths = data_strategy.draw( hu.tensor( max_dim=1, max_value=input.shape[0], dtype=np.int32, elements=st.integers(min_value=0, max_value=27), )) lengths_sum = int(np.sum(lengths).item()) indices = data_strategy.draw( hu.arrays([lengths_sum], dtype=np.int64, elements=st.sampled_from(np.arange(m)))) if is_mean: op = core.CreateOperator("SparseLengthsMean", ["input", "indices", "lengths"], "out") self.assertReferenceChecks(gc, op, [input, indices, lengths], sparse_lengths_mean_ref) else: op = core.CreateOperator("SparseLengthsSum", ["input", "indices", "lengths"], "out") self.assertReferenceChecks(gc, op, [input, indices, lengths], sparse_lengths_sum_ref)
class TestSinusoidPositionEncodingOp(hu.HypothesisTestCase): @given(positions=hu.arrays( dims=[MAX_TEST_SEQUENCE_LENGTH, MAX_TEST_BATCH_SIZE], dtype=np.int32, elements=st.integers(1, MAX_TEST_SEQUENCE_LENGTH)), embedding_size=st.integers(1, MAX_TEST_EMBEDDING_SIZE), alpha=st.floats(MIN_TEST_ALPHA, MAX_TEST_ALPHA), **hu.gcs_cpu_only) def test_sinusoid_embedding(self, positions, embedding_size, alpha, gc, dc): op = core.CreateOperator("SinusoidPositionEncoding", ["positions"], ["output"], embedding_size=embedding_size, alpha=alpha) def sinusoid_encoding(dim, position): x = 1. * position / math.pow(alpha, 1. * dim / embedding_size) return math.sin(x) if dim % 2 == 0 else math.cos(x) def sinusoid_embedding_op(positions): output_shape = (len(positions), len(positions[0]), embedding_size) ar = np.zeros(output_shape) for i, position_vector in enumerate(positions): for j, position in enumerate(position_vector): for k in range(embedding_size): ar[i, j, k] = sinusoid_encoding(k, position) return [ar] self.assertReferenceChecks( device_option=gc, op=op, inputs=[positions], reference=sinusoid_embedding_op, )
def _glu_old_input(draw): dims = draw(st.lists(st.integers(min_value=1, max_value=5), min_size=1, max_size=3)) axis = draw(st.integers(min_value=0, max_value=len(dims))) # The axis dimension must be divisible by two axis_dim = 2 * draw(st.integers(min_value=1, max_value=2)) dims.insert(axis, axis_dim) X = draw(hu.arrays(dims, np.float32, None)) return (X, axis)
def _data_and_scale(data_min_size=4, data_max_size=10, examples_min_number=1, examples_max_number=4, dtype=np.float32, elements=None): dims_ = st.tuples( st.integers(min_value=examples_min_number, max_value=examples_max_number), st.integers(min_value=data_min_size, max_value=data_max_size), ) return dims_.flatmap(lambda dims: st.tuples( hu.arrays([dims[0], dims[1]], dtype=dtype), hu.arrays( [dims[0]], np.int32, st.integers(min_value=5, max_value=10), )))
def _data_and_scale( data_min_size=4, data_max_size=10, examples_min_number=1, examples_max_number=4, dtype=np.float32, elements=None): dims_ = st.tuples( st.integers(min_value=examples_min_number, max_value=examples_max_number), st.integers(min_value=data_min_size, max_value=data_max_size), ) return dims_.flatmap( lambda dims: st.tuples( hu.arrays([dims[0], dims[1]], dtype=dtype), hu.arrays( [dims[0]], np.int32, st.integers(min_value=5, max_value=10), ) ) )
def _data_and_scale(data_min_size=4, data_max_size=10, examples_min_number=1, examples_max_number=4, dtype=np.float32, elements=None): params_ = st.tuples( st.integers(min_value=examples_min_number, max_value=examples_max_number), st.integers(min_value=data_min_size, max_value=data_max_size), st.sampled_from([np.float32, np.int32, np.int64])) return params_.flatmap(lambda param_: st.tuples( hu.arrays([param_[0], param_[1]], dtype=dtype), hu.arrays( [param_[0]], dtype=param_[2], elements=(hu.floats(0.0, 10000.0) if param_[2] in [np.float32] else st.integers(0, 10000)), ), ))
def get_input_tensors(): height = np.random.randint(1, 10) width = np.random.randint(1, 10) dtype = np.float32 input_tensor = hu.arrays( dims=[height, width], dtype=dtype, elements=st.integers(min_value=0, max_value=100), ) return input_tensor
def batched_boarders_and_data( data_min_size=5, data_max_size=10, examples_min_number=1, examples_max_number=4, example_min_size=1, example_max_size=3, dtype=np.float32, elements=None): dims_ = st.tuples( st.integers(min_value=data_min_size, max_value=data_max_size), st.integers(min_value=examples_min_number, max_value=examples_max_number), st.integers(min_value=example_min_size, max_value=example_max_size), ) return dims_.flatmap( lambda dims: st.tuples( hu.arrays( [dims[1], dims[2], 2], dtype=np.int32, elements=st.integers(min_value=0, max_value=dims[0]) ), hu.arrays([dims[0]], dtype, elements) ))
def _data_and_scale( data_min_size=4, data_max_size=10, examples_min_number=1, examples_max_number=4, dtype=np.float32, elements=None): params_ = st.tuples( st.integers(min_value=examples_min_number, max_value=examples_max_number), st.integers(min_value=data_min_size, max_value=data_max_size), st.sampled_from([np.float32, np.int32, np.int64]) ) return params_.flatmap( lambda param_: st.tuples( hu.arrays([param_[0], param_[1]], dtype=dtype), hu.arrays( [param_[0]], dtype=param_[2], elements=(st.floats(0.0, 10000.0) if param_[2] in [np.float32] else st.integers(0, 10000)), ), ) )
class TestSinusoidPositionEncodingOp(serial.SerializedTestCase): @given( positions_vec=hu.arrays( dims=[MAX_TEST_SEQUENCE_LENGTH], dtype=np.int32, elements=st.integers(1, MAX_TEST_SEQUENCE_LENGTH) ), embedding_size=st.integers(1, MAX_TEST_EMBEDDING_SIZE), batch_size=st.integers(1, MAX_TEST_BATCH_SIZE), alpha=st.floats(MIN_TEST_ALPHA, MAX_TEST_ALPHA), amplitude=st.floats(MIN_TEST_AMPLITUDE, MAX_TEST_AMPLITUDE), **hu.gcs_cpu_only ) @settings(deadline=10000) def test_sinusoid_embedding( self, positions_vec, embedding_size, batch_size, alpha, amplitude, gc, dc ): positions = np.tile(positions_vec, [batch_size, 1]).transpose() op = core.CreateOperator( "SinusoidPositionEncoding", ["positions"], ["output"], embedding_size=embedding_size, alpha=alpha, amplitude=amplitude, ) def sinusoid_encoding(dim, position): x = 1. * position / math.pow(alpha, 1. * dim / embedding_size) if dim % 2 == 0: return amplitude * math.sin(x) else: return amplitude * math.cos(x) def sinusoid_embedding_op(positions): output_shape = (len(positions), len(positions[0]), embedding_size) ar = np.zeros(output_shape) for i, position_vector in enumerate(positions): for j, position in enumerate(position_vector): for k in range(embedding_size): ar[i, j, k] = sinusoid_encoding(k, position) return [ar] self.assertReferenceChecks( device_option=gc, op=op, inputs=[positions], reference=sinusoid_embedding_op, )
class TestRegularizerContext(LayersTestCase): @given(X=hu.arrays(dims=[2, 5])) def test_regularizer_context(self, X): weight_reg_out = L1Norm(0.2) bias_reg_out = L1Norm(0) regularizers = {"WEIGHT": weight_reg_out, "BIAS": bias_reg_out} output_dims = 2 input_record = self.new_record(schema.Scalar((np.float32, (5, )))) schema.FeedRecord(input_record, [X]) with UseRegularizer(regularizers): weight_reg = RegularizerContext.current().get_regularizer("WEIGHT") bias_reg = RegularizerContext.current().get_regularizer("BIAS") optim = SgdOptimizer(0.15) assert (weight_reg == weight_reg_out ), "fail to get correct weight reg from context" assert bias_reg == bias_reg_out, "fail to get correct bias reg from context" fc_output = self.model.FC( input_record, output_dims, weight_optim=optim, bias_optim=optim, weight_reg=weight_reg, bias_reg=bias_reg, ) # model.output_schema has to a struct self.model.output_schema = schema.Struct(("fc_output", fc_output)) self.assertEqual(schema.Scalar((np.float32, (output_dims, ))), fc_output) _, train_net = layer_model_instantiator.generate_training_nets( self.model) ops = train_net.Proto().op ops_type_list = [ops[i].type for i in range(len(ops))] assert ops_type_list.count("LpNorm") == 2 assert ops_type_list.count("Scale") == 4 assert ops_type_list.count("LpNormGradient") == 2
class TestLayers(LayersTestCase): def testAddLoss(self): input_record_LR = self.new_record( schema.Struct(('label', schema.Scalar((np.float64, (1, )))), ('prediction', schema.Scalar((np.float32, (2, )))), ('weight', schema.Scalar((np.float64, (1, )))))) loss_LR = self.model.BatchLRLoss(input_record_LR) self.model.add_loss(loss_LR) assert 'unnamed' in self.model.loss self.assertEqual(schema.Scalar((np.float32, tuple())), self.model.loss.unnamed) self.assertEqual(loss_LR, self.model.loss.unnamed) self.model.add_loss(loss_LR, 'addLoss') assert 'addLoss' in self.model.loss self.assertEqual(schema.Scalar((np.float32, tuple())), self.model.loss.addLoss) self.assertEqual(loss_LR, self.model.loss.addLoss) self.model.add_loss( schema.Scalar(dtype=np.float32, blob=core.BlobReference('loss_blob_1')), 'addLoss') assert 'addLoss_auto_0' in self.model.loss self.assertEqual(schema.Scalar((np.float32, tuple())), self.model.loss.addLoss_auto_0) assert core.BlobReference( 'loss_blob_1') in self.model.loss.field_blobs() self.model.add_loss( schema.Struct( ('structName', schema.Scalar(dtype=np.float32, blob=core.BlobReference('loss_blob_2')))), 'addLoss') assert 'addLoss_auto_1' in self.model.loss self.assertEqual( schema.Struct(('structName', schema.Scalar( (np.float32, tuple())))), self.model.loss.addLoss_auto_1) assert core.BlobReference( 'loss_blob_2') in self.model.loss.field_blobs() loss_in_tuple_0 = schema.Scalar( dtype=np.float32, blob=core.BlobReference('loss_blob_in_tuple_0')) loss_in_tuple_1 = schema.Scalar( dtype=np.float32, blob=core.BlobReference('loss_blob_in_tuple_1')) loss_tuple = schema.NamedTuple('loss_in_tuple', *[loss_in_tuple_0, loss_in_tuple_1]) self.model.add_loss(loss_tuple, 'addLoss') assert 'addLoss_auto_2' in self.model.loss self.assertEqual( schema.Struct( ('loss_in_tuple_0', schema.Scalar((np.float32, tuple()))), ('loss_in_tuple_1', schema.Scalar((np.float32, tuple())))), self.model.loss.addLoss_auto_2) assert core.BlobReference('loss_blob_in_tuple_0')\ in self.model.loss.field_blobs() assert core.BlobReference('loss_blob_in_tuple_1')\ in self.model.loss.field_blobs() def _test_net(self, net, ops_list): """ Helper function to assert the net contains some set of operations and then to run the net. Inputs: net -- the network to test and run ops_list -- the list of operation specifications to check for in the net """ ops_output = self.assertNetContainOps(net, ops_list) workspace.RunNetOnce(net) return ops_output def testFCWithoutBias(self): output_dims = 2 fc_without_bias = self.model.FCWithoutBias( self.model.input_feature_schema.float_features, output_dims) self.model.output_schema = fc_without_bias self.assertEqual(schema.Scalar((np.float32, (output_dims, ))), fc_without_bias) train_init_net, train_net = self.get_training_nets() init_ops = self.assertNetContainOps(train_init_net, [ OpSpec("UniformFill", None, None), ]) mat_mul_spec = OpSpec("MatMul", [ self.model.input_feature_schema.float_features(), init_ops[0].output[0], ], fc_without_bias.field_blobs()) self.assertNetContainOps(train_net, [mat_mul_spec]) predict_net = self.get_predict_net() self.assertNetContainOps(predict_net, [mat_mul_spec]) def testSamplingTrain(self): output_dims = 1000 indices = self.new_record(schema.Scalar((np.int32, (10, )))) sampling_prob = self.new_record(schema.Scalar((np.float32, (10, )))) sampled_fc = self.model.SamplingTrain( schema.Struct( ('input', self.model.input_feature_schema.float_features), ('indices', indices), ('sampling_prob', sampling_prob), ), "FC", output_dims, ) self.model.output_schema = sampled_fc # Check that we don't add prediction layer into the model self.assertEqual(1, len(self.model.layers)) self.assertEqual(schema.Scalar((np.float32, (output_dims, ))), sampled_fc) train_init_net, train_net = self.get_training_nets() init_ops = self.assertNetContainOps(train_init_net, [ OpSpec("UniformFill", None, None), OpSpec("UniformFill", None, None), ]) sampled_fc_layer = self.model.layers[0] gather_w_spec = OpSpec("Gather", [ init_ops[0].output[0], indices(), ], [sampled_fc_layer._prediction_layer.train_param_blobs[0]]) gather_b_spec = OpSpec("Gather", [ init_ops[1].output[0], indices(), ], [sampled_fc_layer._prediction_layer.train_param_blobs[1]]) train_fc_spec = OpSpec("FC", [ self.model.input_feature_schema.float_features(), ] + sampled_fc_layer._prediction_layer.train_param_blobs, sampled_fc.field_blobs()) log_spec = OpSpec("Log", [sampling_prob()], [None]) sub_spec = OpSpec("Sub", [sampled_fc.field_blobs()[0], None], sampled_fc.field_blobs()) train_ops = self.assertNetContainOps( train_net, [gather_w_spec, gather_b_spec, train_fc_spec, log_spec, sub_spec]) self.assertEqual(train_ops[3].output[0], train_ops[4].input[1]) predict_net = self.get_predict_net() self.assertNetContainOps(predict_net, [ OpSpec("FC", [ self.model.input_feature_schema.float_features(), init_ops[0].output[0], init_ops[1].output[0], ], sampled_fc.field_blobs()) ]) def testBatchLRLoss(self): input_record = self.new_record( schema.Struct(('label', schema.Scalar((np.float64, (1, )))), ('prediction', schema.Scalar((np.float32, (2, )))), ('weight', schema.Scalar((np.float64, (1, )))))) loss = self.model.BatchLRLoss(input_record) self.assertEqual(schema.Scalar((np.float32, tuple())), loss) def testBatchMSELoss(self): input_record = self.new_record( schema.Struct( ('label', schema.Scalar((np.float64, (1, )))), ('prediction', schema.Scalar((np.float32, (2, )))), )) loss = self.model.BatchMSELoss(input_record) self.assertEqual(schema.Scalar((np.float32, tuple())), loss) def testBatchSigmoidCrossEntropyLoss(self): input_record = self.new_record( schema.Struct(('label', schema.Scalar((np.float32, (32, )))), ('prediction', schema.Scalar((np.float32, (32, )))))) loss = self.model.BatchSigmoidCrossEntropyLoss(input_record) self.assertEqual(schema.Scalar((np.float32, tuple())), loss) def testBatchSoftmaxLoss(self): input_record = self.new_record( schema.Struct(('label', schema.Scalar((np.float32, tuple()))), ('prediction', schema.Scalar((np.float32, (32, )))))) loss = self.model.BatchSoftmaxLoss(input_record) self.assertEqual( schema.Struct( ('softmax', schema.Scalar((np.float32, (32, )))), ('loss', schema.Scalar(np.float32)), ), loss) def testBatchSoftmaxLossWeight(self): input_record = self.new_record( schema.Struct(('label', schema.Scalar((np.float32, tuple()))), ('prediction', schema.Scalar((np.float32, (32, )))), ('weight', schema.Scalar((np.float64, (1, )))))) loss = self.model.BatchSoftmaxLoss(input_record) self.assertEqual( schema.Struct( ('softmax', schema.Scalar((np.float32, (32, )))), ('loss', schema.Scalar(np.float32)), ), loss) @given( X=hu.arrays(dims=[2, 5]), ) def testBatchNormalization(self, X): input_record = self.new_record(schema.Scalar((np.float32, (5, )))) schema.FeedRecord(input_record, [X]) bn_output = self.model.BatchNormalization(input_record) self.assertEqual(schema.Scalar((np.float32, (5, ))), bn_output) self.model.output_schema = schema.Struct() train_init_net, train_net = self.get_training_nets() init_ops = self.assertNetContainOps(train_init_net, [ OpSpec("ConstantFill", None, None), OpSpec("ConstantFill", None, None), OpSpec("ConstantFill", None, None), OpSpec("ConstantFill", None, None), ]) input_blob = input_record.field_blobs()[0] output_blob = bn_output.field_blobs()[0] expand_dims_spec = OpSpec( "ExpandDims", [input_blob], None, ) train_bn_spec = OpSpec( "SpatialBN", [ None, init_ops[0].output[0], init_ops[1].output[0], init_ops[2].output[0], init_ops[3].output[0] ], [ output_blob, init_ops[2].output[0], init_ops[3].output[0], None, None ], { 'is_test': 0, 'order': 'NCHW', 'momentum': 0.9 }, ) test_bn_spec = OpSpec( "SpatialBN", [ None, init_ops[0].output[0], init_ops[1].output[0], init_ops[2].output[0], init_ops[3].output[0] ], [output_blob], { 'is_test': 1, 'order': 'NCHW', 'momentum': 0.9 }, ) squeeze_spec = OpSpec( "Squeeze", [output_blob], [output_blob], ) self.assertNetContainOps( train_net, [expand_dims_spec, train_bn_spec, squeeze_spec]) eval_net = self.get_eval_net() self.assertNetContainOps( eval_net, [expand_dims_spec, test_bn_spec, squeeze_spec]) predict_net = self.get_predict_net() self.assertNetContainOps( predict_net, [expand_dims_spec, test_bn_spec, squeeze_spec]) workspace.RunNetOnce(train_init_net) workspace.RunNetOnce(train_net) schema.FeedRecord(input_record, [X]) workspace.RunNetOnce(eval_net) schema.FeedRecord(input_record, [X]) workspace.RunNetOnce(predict_net) @given( X=hu.arrays(dims=[5, 2]), num_to_collect=st.integers(min_value=1, max_value=10), ) def testLastNWindowCollector(self, X, num_to_collect): input_record = self.new_record(schema.Scalar(np.float32)) schema.FeedRecord(input_record, [X]) last_n = self.model.LastNWindowCollector(input_record, num_to_collect) self.run_train_net_forward_only() output_record = schema.FetchRecord(last_n) start = max(0, 5 - num_to_collect) npt.assert_array_equal(X[start:], output_record()) def testUniformSampling(self): input_record = self.new_record(schema.Scalar(np.int32)) input_array = np.array([3, 10, 11, 15, 20, 99], dtype=np.int32) schema.FeedRecord(input_record, [input_array]) num_samples = 20 num_elements = 100 uniform_sampling_output = self.model.UniformSampling( input_record, num_samples, num_elements) self.model.loss = uniform_sampling_output self.run_train_net() samples = workspace.FetchBlob(uniform_sampling_output.samples()) sampling_prob = workspace.FetchBlob( uniform_sampling_output.sampling_prob()) self.assertEqual(num_samples, len(samples)) np.testing.assert_array_equal(input_array, samples[:len(input_array)]) np.testing.assert_almost_equal( np.array([float(num_samples) / num_elements] * num_samples, dtype=np.float32), sampling_prob) def testGatherRecord(self): indices = np.array([1, 3, 4], dtype=np.int32) dense = np.array(list(range(20)), dtype=np.float32).reshape(10, 2) lengths = np.array(list(range(10)), dtype=np.int32) items = np.array(list(range(lengths.sum())), dtype=np.int64) items_lengths = np.array(list(range(lengths.sum())), dtype=np.int32) items_items = np.array(list(range(items_lengths.sum())), dtype=np.int64) record = self.new_record( schema.Struct( ('dense', schema.Scalar(np.float32)), ('sparse', schema.Struct( ('list', schema.List(np.int64)), ('list_of_list', schema.List(schema.List(np.int64))), )), ('empty_struct', schema.Struct()))) indices_record = self.new_record(schema.Scalar(np.int32)) input_record = schema.Struct( ('indices', indices_record), ('record', record), ) schema.FeedRecord(input_record, [ indices, dense, lengths, items, lengths, items_lengths, items_items ]) gathered_record = self.model.GatherRecord(input_record) self.assertTrue(schema.equal_schemas(gathered_record, record)) self.run_train_net_forward_only() gathered_dense = workspace.FetchBlob(gathered_record.dense()) np.testing.assert_array_equal( np.concatenate([dense[i:i + 1] for i in indices]), gathered_dense) gathered_lengths = workspace.FetchBlob( gathered_record.sparse.list.lengths()) np.testing.assert_array_equal( np.concatenate([lengths[i:i + 1] for i in indices]), gathered_lengths) gathered_items = workspace.FetchBlob( gathered_record.sparse.list.items()) offsets = lengths.cumsum() - lengths np.testing.assert_array_equal( np.concatenate( [items[offsets[i]:offsets[i] + lengths[i]] for i in indices]), gathered_items) gathered_items_lengths = workspace.FetchBlob( gathered_record.sparse.list_of_list.items.lengths()) np.testing.assert_array_equal( np.concatenate([ items_lengths[offsets[i]:offsets[i] + lengths[i]] for i in indices ]), gathered_items_lengths) nested_offsets = [] nested_lengths = [] nested_offset = 0 j = 0 for l in lengths: nested_offsets.append(nested_offset) nested_length = 0 for _i in range(l): nested_offset += items_lengths[j] nested_length += items_lengths[j] j += 1 nested_lengths.append(nested_length) gathered_items_items = workspace.FetchBlob( gathered_record.sparse.list_of_list.items.items()) np.testing.assert_array_equal( np.concatenate([ items_items[nested_offsets[i]:nested_offsets[i] + nested_lengths[i]] for i in indices ]), gathered_items_items) def testMapToRange(self): input_record = self.new_record(schema.Scalar(np.int32)) indices_blob = self.model.MapToRange(input_record, max_index=100).indices self.model.output_schema = schema.Struct() train_init_net, train_net = self.get_training_nets() schema.FeedRecord( input_record, [np.array([10, 3, 20, 99, 15, 11, 3, 11], dtype=np.int32)]) workspace.RunNetOnce(train_init_net) workspace.RunNetOnce(train_net) indices = workspace.FetchBlob(indices_blob()) np.testing.assert_array_equal( np.array([1, 2, 3, 4, 5, 6, 2, 6], dtype=np.int32), indices) schema.FeedRecord( input_record, [np.array([10, 3, 23, 35, 60, 15, 10, 15], dtype=np.int32)]) workspace.RunNetOnce(train_net) indices = workspace.FetchBlob(indices_blob()) np.testing.assert_array_equal( np.array([1, 2, 7, 8, 9, 5, 1, 5], dtype=np.int32), indices) eval_net = self.get_eval_net() schema.FeedRecord( input_record, [np.array([10, 3, 23, 35, 60, 15, 200], dtype=np.int32)]) workspace.RunNetOnce(eval_net) indices = workspace.FetchBlob(indices_blob()) np.testing.assert_array_equal( np.array([1, 2, 7, 8, 9, 5, 0], dtype=np.int32), indices) schema.FeedRecord( input_record, [np.array([10, 3, 23, 15, 101, 115], dtype=np.int32)]) workspace.RunNetOnce(eval_net) indices = workspace.FetchBlob(indices_blob()) np.testing.assert_array_equal( np.array([1, 2, 7, 5, 0, 0], dtype=np.int32), indices) predict_net = self.get_predict_net() schema.FeedRecord( input_record, [np.array([3, 3, 20, 23, 151, 35, 60, 15, 200], dtype=np.int32)]) workspace.RunNetOnce(predict_net) indices = workspace.FetchBlob(indices_blob()) np.testing.assert_array_equal( np.array([2, 2, 3, 7, 0, 8, 9, 5, 0], dtype=np.int32), indices) def testSelectRecordByContext(self): float_features = self.model.input_feature_schema.float_features float_array = np.array([1.0, 2.0], dtype=np.float32) schema.FeedRecord(float_features, [float_array]) with Tags(Tags.EXCLUDE_FROM_PREDICTION): log_float_features = self.model.Log(float_features, 1) joined = self.model.SelectRecordByContext( schema.Struct( (InstantiationContext.PREDICTION, float_features), (InstantiationContext.TRAINING, log_float_features), # TODO: TRAIN_ONLY layers are also generated in eval (InstantiationContext.EVAL, log_float_features), )) # model.output_schema has to a struct self.model.output_schema = schema.Struct(('joined', joined)) predict_net = layer_model_instantiator.generate_predict_net(self.model) workspace.RunNetOnce(predict_net) predict_output = schema.FetchRecord(predict_net.output_record()) npt.assert_array_equal(float_array, predict_output['joined']()) eval_net = layer_model_instantiator.generate_eval_net(self.model) workspace.RunNetOnce(eval_net) eval_output = schema.FetchRecord(eval_net.output_record()) npt.assert_array_equal(np.log(float_array), eval_output['joined']()) _, train_net = ( layer_model_instantiator.generate_training_nets_forward_only( self.model)) workspace.RunNetOnce(train_net) train_output = schema.FetchRecord(train_net.output_record()) npt.assert_array_equal(np.log(float_array), train_output['joined']()) def testFunctionalLayer(self): def normalize(net, in_record, out_record): mean = net.ReduceFrontMean(in_record(), 1) net.Sub([in_record(), mean], out_record(), broadcast=1) normalized = self.model.Functional( self.model.input_feature_schema.float_features, 1, normalize, name="normalizer") # Attach metadata to one of the outputs and use it in FC normalized.set_type((np.float32, 32)) self.model.output_schema = self.model.FC(normalized, 2) predict_net = layer_model_instantiator.generate_predict_net(self.model) ops = predict_net.Proto().op assert len(ops) == 3 assert ops[0].type == "ReduceFrontMean" assert ops[1].type == "Sub" assert ops[2].type == "FC" assert len(ops[0].input) == 1 assert ops[0].input[0] ==\ self.model.input_feature_schema.float_features() assert len(ops[1].output) == 1 assert ops[1].output[0] in ops[2].input def testFunctionalLayerHelper(self): mean = self.model.ReduceFrontMean( self.model.input_feature_schema.float_features, 1) normalized = self.model.Sub(schema.Tuple( self.model.input_feature_schema.float_features, mean), 1, broadcast=1) # Attach metadata to one of the outputs and use it in FC normalized.set_type((np.float32, (32, ))) self.model.output_schema = self.model.FC(normalized, 2) predict_net = layer_model_instantiator.generate_predict_net(self.model) ops = predict_net.Proto().op assert len(ops) == 3 assert ops[0].type == "ReduceFrontMean" assert ops[1].type == "Sub" assert ops[2].type == "FC" assert len(ops[0].input) == 1 assert ops[0].input[0] ==\ self.model.input_feature_schema.float_features() assert len(ops[1].output) == 1 assert ops[1].output[0] in ops[2].input def testFunctionalLayerHelperAutoInference(self): softsign = self.model.Softsign( schema.Tuple(self.model.input_feature_schema.float_features), 1) assert softsign.field_type().base == np.float32 assert softsign.field_type().shape == (32, ) self.model.output_schema = self.model.FC(softsign, 2) predict_net = layer_model_instantiator.generate_predict_net(self.model) ops = predict_net.Proto().op assert len(ops) == 2 assert ops[0].type == "Softsign" assert ops[1].type == "FC" assert len(ops[0].input) == 1 assert ops[0].input[0] ==\ self.model.input_feature_schema.float_features() assert len(ops[0].output) == 1 assert ops[0].output[0] in ops[1].input def testFunctionalLayerHelperAutoInferenceScalar(self): loss = self.model.AveragedLoss(self.model.input_feature_schema, 1) self.assertEqual(1, len(loss.field_types())) self.assertEqual(np.float32, loss.field_types()[0].base) self.assertEqual(tuple(), loss.field_types()[0].shape) def testFunctionalLayerInputCoercion(self): one = self.model.global_constants['ONE'] two = self.model.Add([one, one], 1) self.model.loss = two self.run_train_net() data = workspace.FetchBlob(two.field_blobs()[0]) np.testing.assert_array_equal([2.0], data) def testFunctionalLayerWithOutputNames(self): k = 3 topk = self.model.TopK( self.model.input_feature_schema, output_names_or_num=['values', 'indices'], k=k, ) self.assertEqual(2, len(topk.field_types())) self.assertEqual(np.float32, topk.field_types()[0].base) self.assertEqual((k, ), topk.field_types()[0].shape) self.assertEqual(np.int32, topk.field_types()[1].base) self.assertEqual((k, ), topk.field_types()[1].shape) self.assertEqual(['TopK/values', 'TopK/indices'], topk.field_blobs()) def testFunctionalLayerWithOutputDtypes(self): loss = self.model.AveragedLoss( self.model.input_feature_schema, 1, output_dtypes=(np.float32, (1, )), ) self.assertEqual(1, len(loss.field_types())) self.assertEqual(np.float32, loss.field_types()[0].base) self.assertEqual((1, ), loss.field_types()[0].shape) def testPropagateRequestOnly(self): # test case when output is request only input_record = self.new_record( schema.Struct( ('input1', schema.Scalar((np.float32, (32, )))), ('input2', schema.Scalar((np.float32, (64, )))), ('input3', schema.Scalar((np.float32, (16, )))), )) set_request_only(input_record) concat_output = self.model.Concat(input_record) self.assertEqual(is_request_only_scalar(concat_output), True) # test case when output is not request only input_record2 = self.new_record( schema.Struct(('input4', schema.Scalar( (np.float32, (100, )))))) + input_record concat_output2 = self.model.Concat(input_record2) self.assertEqual(is_request_only_scalar(concat_output2), False) def testSetRequestOnly(self): input_record = schema.Scalar(np.int64) schema.attach_metadata_to_scalars( input_record, schema.Metadata( categorical_limit=100000000, expected_value=99, feature_specs=schema.FeatureSpec(feature_ids=[1, 100, 1001]))) set_request_only(input_record) self.assertEqual(input_record.metadata.categorical_limit, 100000000) self.assertEqual(input_record.metadata.expected_value, 99) self.assertEqual(input_record.metadata.feature_specs.feature_ids, [1, 100, 1001]) @given( X=hu.arrays(dims=[5, 5]), # Shape of X is irrelevant ) def testDropout(self, X): input_record = self.new_record(schema.Scalar((np.float32, (1, )))) schema.FeedRecord(input_record, [X]) d_output = self.model.Dropout(input_record) self.assertEqual(schema.Scalar((np.float32, (1, ))), d_output) self.model.output_schema = schema.Struct() train_init_net, train_net = self.get_training_nets() input_blob = input_record.field_blobs()[0] output_blob = d_output.field_blobs()[0] train_d_spec = OpSpec("Dropout", [input_blob], [output_blob, None], { 'is_test': 0, 'ratio': 0.5 }) test_d_spec = OpSpec("Dropout", [input_blob], [output_blob, None], { 'is_test': 1, 'ratio': 0.5 }) self.assertNetContainOps(train_net, [train_d_spec]) eval_net = self.get_eval_net() self.assertNetContainOps(eval_net, [test_d_spec]) predict_net = self.get_predict_net() self.assertNetContainOps(predict_net, [test_d_spec]) workspace.RunNetOnce(train_init_net) workspace.RunNetOnce(train_net) schema.FeedRecord(input_record, [X]) workspace.RunNetOnce(eval_net) schema.FeedRecord(input_record, [X]) workspace.RunNetOnce(predict_net) @given( batch_size=st.integers(min_value=2, max_value=10), input_dims=st.integers(min_value=5, max_value=10), output_dims=st.integers(min_value=5, max_value=10), bandwidth=st.floats(min_value=0.1, max_value=5), ) def testRandomFourierFeatures(self, batch_size, input_dims, output_dims, bandwidth): def _rff_hypothesis_test(rff_output, X, W, b, scale): """ Runs hypothesis test for Semi Random Features layer. Inputs: rff_output -- output of net after running random fourier features layer X -- input data W -- weight parameter from train_init_net b -- bias parameter from train_init_net scale -- value by which to scale the output vector """ output = workspace.FetchBlob(rff_output) output_ref = scale * np.cos(np.dot(X, np.transpose(W)) + b) npt.assert_allclose(output, output_ref, rtol=1e-4) X = np.random.random((batch_size, input_dims)).astype(np.float32) scale = np.sqrt(2.0 / output_dims) input_record = self.new_record( schema.Scalar((np.float32, (input_dims, )))) schema.FeedRecord(input_record, [X]) input_blob = input_record.field_blobs()[0] rff_output = self.model.RandomFourierFeatures(input_record, output_dims, bandwidth) self.model.output_schema = schema.Struct() self.assertEqual(schema.Scalar((np.float32, (output_dims, ))), rff_output) train_init_net, train_net = self.get_training_nets() # Init net assertions init_ops_list = [ OpSpec("GaussianFill", None, None), OpSpec("UniformFill", None, None), ] init_ops = self._test_net(train_init_net, init_ops_list) W = workspace.FetchBlob(self.model.layers[0].w) b = workspace.FetchBlob(self.model.layers[0].b) # Operation specifications fc_spec = OpSpec( "FC", [input_blob, init_ops[0].output[0], init_ops[1].output[0]], None) cosine_spec = OpSpec("Cos", None, None) scale_spec = OpSpec("Scale", None, rff_output.field_blobs(), {'scale': scale}) ops_list = [fc_spec, cosine_spec, scale_spec] # Train net assertions self._test_net(train_net, ops_list) _rff_hypothesis_test(rff_output(), X, W, b, scale) # Eval net assertions eval_net = self.get_eval_net() self._test_net(eval_net, ops_list) _rff_hypothesis_test(rff_output(), X, W, b, scale) # Predict net assertions predict_net = self.get_predict_net() self._test_net(predict_net, ops_list) _rff_hypothesis_test(rff_output(), X, W, b, scale) @given(batch_size=st.integers(min_value=2, max_value=10), input_dims=st.integers(min_value=5, max_value=10), output_dims=st.integers(min_value=5, max_value=10), s=st.integers(min_value=0, max_value=3), scale=st.floats(min_value=0.1, max_value=5), set_weight_as_global_constant=st.booleans()) def testArcCosineFeatureMap(self, batch_size, input_dims, output_dims, s, scale, set_weight_as_global_constant): def _arc_cosine_hypothesis_test(ac_output, X, W, b, s): """ Runs hypothesis test for Arc Cosine layer. Inputs: ac_output -- output of net after running arc cosine layer X -- input data W -- weight parameter from train_init_net b -- bias parameter from train_init_net s -- degree parameter """ # Get output from net net_output = workspace.FetchBlob(ac_output) # Computing output directly x_rand = np.matmul(X, np.transpose(W)) + b x_pow = np.power(x_rand, s) if s > 0: h_rand_features = np.piecewise(x_rand, [x_rand <= 0, x_rand > 0], [0, 1]) else: h_rand_features = np.piecewise(x_rand, [x_rand <= 0, x_rand > 0], [0, lambda x: x / (1 + x)]) output_ref = np.multiply(x_pow, h_rand_features) # Comparing net output and computed output npt.assert_allclose(net_output, output_ref, rtol=1e-4) X = np.random.normal(size=(batch_size, input_dims)).astype(np.float32) input_record = self.new_record( schema.Scalar((np.float32, (input_dims, )))) schema.FeedRecord(input_record, [X]) input_blob = input_record.field_blobs()[0] ac_output = self.model.ArcCosineFeatureMap( input_record, output_dims, s=s, scale=scale, set_weight_as_global_constant=set_weight_as_global_constant) self.model.output_schema = schema.Struct() self.assertEqual(schema.Scalar((np.float32, (output_dims, ))), ac_output) train_init_net, train_net = self.get_training_nets() # Run create_init_net to initialize the global constants, and W and b workspace.RunNetOnce(train_init_net) workspace.RunNetOnce(self.model.create_init_net(name='init_net')) if set_weight_as_global_constant: W = workspace.FetchBlob( self.model. global_constants['arc_cosine_feature_map_fixed_rand_W']) b = workspace.FetchBlob( self.model. global_constants['arc_cosine_feature_map_fixed_rand_b']) else: W = workspace.FetchBlob(self.model.layers[0].random_w) b = workspace.FetchBlob(self.model.layers[0].random_b) # Operation specifications fc_spec = OpSpec("FC", [input_blob, None, None], None) softsign_spec = OpSpec("Softsign", None, None) relu_spec = OpSpec("Relu", None, None) relu_spec_output = OpSpec("Relu", None, ac_output.field_blobs()) pow_spec = OpSpec("Pow", None, None, {'exponent': float(s - 1)}) mul_spec = OpSpec("Mul", None, ac_output.field_blobs()) if s == 0: ops_list = [ fc_spec, softsign_spec, relu_spec_output, ] elif s == 1: ops_list = [ fc_spec, relu_spec_output, ] else: ops_list = [ fc_spec, relu_spec, pow_spec, mul_spec, ] # Train net assertions self._test_net(train_net, ops_list) _arc_cosine_hypothesis_test(ac_output(), X, W, b, s) # Eval net assertions eval_net = self.get_eval_net() self._test_net(eval_net, ops_list) _arc_cosine_hypothesis_test(ac_output(), X, W, b, s) # Predict net assertions predict_net = self.get_predict_net() self._test_net(predict_net, ops_list) _arc_cosine_hypothesis_test(ac_output(), X, W, b, s) @given( batch_size=st.integers(min_value=2, max_value=10), input_dims=st.integers(min_value=5, max_value=10), output_dims=st.integers(min_value=5, max_value=10), s=st.integers(min_value=0, max_value=3), scale=st.floats(min_value=0.1, max_value=5), set_weight_as_global_constant=st.booleans(), use_struct_input=st.booleans(), ) def testSemiRandomFeatures(self, batch_size, input_dims, output_dims, s, scale, set_weight_as_global_constant, use_struct_input): def _semi_random_hypothesis_test(srf_output, X_full, X_random, rand_w, rand_b, s): """ Runs hypothesis test for Semi Random Features layer. Inputs: srf_output -- output of net after running semi random features layer X_full -- full input data X_random -- random-output input data rand_w -- random-initialized weight parameter from train_init_net rand_b -- random-initialized bias parameter from train_init_net s -- degree parameter """ # Get output from net net_output = workspace.FetchBlob(srf_output) # Fetch learned parameter blobs learned_w = workspace.FetchBlob(self.model.layers[0].learned_w) learned_b = workspace.FetchBlob(self.model.layers[0].learned_b) # Computing output directly x_rand = np.matmul(X_random, np.transpose(rand_w)) + rand_b x_learn = np.matmul(X_full, np.transpose(learned_w)) + learned_b x_pow = np.power(x_rand, s) if s > 0: h_rand_features = np.piecewise(x_rand, [x_rand <= 0, x_rand > 0], [0, 1]) else: h_rand_features = np.piecewise(x_rand, [x_rand <= 0, x_rand > 0], [0, lambda x: x / (1 + x)]) output_ref = np.multiply(np.multiply(x_pow, h_rand_features), x_learn) # Comparing net output and computed output npt.assert_allclose(net_output, output_ref, rtol=1e-4) X_full = np.random.normal(size=(batch_size, input_dims)).astype(np.float32) if use_struct_input: X_random = np.random.normal(size=(batch_size, input_dims)).\ astype(np.float32) input_data = [X_full, X_random] input_record = self.new_record( schema.Struct( ('full', schema.Scalar((np.float32, (input_dims, )))), ('random', schema.Scalar((np.float32, (input_dims, )))))) else: X_random = X_full input_data = [X_full] input_record = self.new_record( schema.Scalar((np.float32, (input_dims, )))) schema.FeedRecord(input_record, input_data) srf_output = self.model.SemiRandomFeatures( input_record, output_dims, s=s, scale_random=scale, scale_learned=scale, set_weight_as_global_constant=set_weight_as_global_constant) self.model.output_schema = schema.Struct() self.assertEqual( schema.Struct( ('full', schema.Scalar((np.float32, (output_dims, )))), ('random', schema.Scalar((np.float32, (output_dims, ))))), srf_output) init_ops_list = [ OpSpec("GaussianFill", None, None), OpSpec("UniformFill", None, None), OpSpec("GaussianFill", None, None), OpSpec("UniformFill", None, None), ] train_init_net, train_net = self.get_training_nets() # Need to run to initialize the global constants for layer workspace.RunNetOnce(self.model.create_init_net(name='init_net')) if set_weight_as_global_constant: # If weight params are global constants, they won't be in train_init_net init_ops = self._test_net(train_init_net, init_ops_list[:2]) rand_w = workspace.FetchBlob( self.model. global_constants['semi_random_features_fixed_rand_W']) rand_b = workspace.FetchBlob( self.model. global_constants['semi_random_features_fixed_rand_b']) # Operation specifications fc_random_spec = OpSpec("FC", [None, None, None], None) fc_learned_spec = OpSpec( "FC", [None, init_ops[0].output[0], init_ops[1].output[0]], None) else: init_ops = self._test_net(train_init_net, init_ops_list) rand_w = workspace.FetchBlob(self.model.layers[0].random_w) rand_b = workspace.FetchBlob(self.model.layers[0].random_b) # Operation specifications fc_random_spec = OpSpec( "FC", [None, init_ops[0].output[0], init_ops[1].output[0]], None) fc_learned_spec = OpSpec( "FC", [None, init_ops[2].output[0], init_ops[3].output[0]], None) softsign_spec = OpSpec("Softsign", None, None) relu_spec = OpSpec("Relu", None, None) relu_output_spec = OpSpec("Relu", None, srf_output.random.field_blobs()) pow_spec = OpSpec("Pow", None, None, {'exponent': float(s - 1)}) mul_interim_spec = OpSpec("Mul", None, srf_output.random.field_blobs()) mul_spec = OpSpec("Mul", None, srf_output.full.field_blobs()) if s == 0: ops_list = [ fc_learned_spec, fc_random_spec, softsign_spec, relu_output_spec, mul_spec, ] elif s == 1: ops_list = [ fc_learned_spec, fc_random_spec, relu_output_spec, mul_spec, ] else: ops_list = [ fc_learned_spec, fc_random_spec, relu_spec, pow_spec, mul_interim_spec, mul_spec, ] # Train net assertions self._test_net(train_net, ops_list) _semi_random_hypothesis_test(srf_output.full(), X_full, X_random, rand_w, rand_b, s) # Eval net assertions eval_net = self.get_eval_net() self._test_net(eval_net, ops_list) _semi_random_hypothesis_test(srf_output.full(), X_full, X_random, rand_w, rand_b, s) # Predict net assertions predict_net = self.get_predict_net() self._test_net(predict_net, ops_list) _semi_random_hypothesis_test(srf_output.full(), X_full, X_random, rand_w, rand_b, s) def testConv(self): batch_size = 50 H = 1 W = 10 C = 50 output_dims = 32 kernel_h = 1 kernel_w = 3 stride_h = 1 stride_w = 1 pad_t = 0 pad_b = 0 pad_r = None pad_l = None input_record = self.new_record(schema.Scalar((np.float32, (H, W, C)))) X = np.random.random((batch_size, H, W, C)).astype(np.float32) schema.FeedRecord(input_record, [X]) conv = self.model.Conv(input_record, output_dims, kernel_h=kernel_h, kernel_w=kernel_w, stride_h=stride_h, stride_w=stride_w, pad_t=pad_t, pad_b=pad_b, pad_r=pad_r, pad_l=pad_l, order='NHWC') self.assertEqual(schema.Scalar((np.float32, (output_dims, ))), conv) self.run_train_net_forward_only() output_record = schema.FetchRecord(conv) # check the number of output channels is the same as input in this example assert output_record.field_types()[0].shape == (H, W, output_dims) assert output_record().shape == (batch_size, H, W, output_dims) train_init_net, train_net = self.get_training_nets() # Init net assertions init_ops = self.assertNetContainOps(train_init_net, [ OpSpec("XavierFill", None, None), OpSpec("ConstantFill", None, None), ]) conv_spec = OpSpec("Conv", [ input_record.field_blobs()[0], init_ops[0].output[0], init_ops[1].output[0], ], conv.field_blobs()) # Train net assertions self.assertNetContainOps(train_net, [conv_spec]) # Predict net assertions predict_net = self.get_predict_net() self.assertNetContainOps(predict_net, [conv_spec]) # Eval net assertions eval_net = self.get_eval_net() self.assertNetContainOps(eval_net, [conv_spec])
def create_input(dims): dims = list(dims) dims[2] *= 3 return hu.arrays(dims)
class TestUtilityOps(serial.SerializedTestCase): @given(X=hu.tensor(), args=st.booleans(), **hu.gcs) @settings(deadline=10000) def test_slice(self, X, args, gc, dc): X = X.astype(dtype=np.float32) dim = random.randint(0, X.ndim - 1) slice_start = random.randint(0, X.shape[dim] - 1) slice_end = random.randint(slice_start, X.shape[dim] - 1) starts = np.array([0] * X.ndim).astype(np.int32) ends = np.array([-1] * X.ndim).astype(np.int32) starts[dim] = slice_start ends[dim] = slice_end if args: op = core.CreateOperator( "Slice", ["X"], ["Y"], starts=starts, ends=ends, device_option=gc ) def slice_ref(X): slc = [slice(None)] * X.ndim slc[dim] = slice(slice_start, slice_end) return [X[slc]] inputs = [X] else: op = core.CreateOperator( "Slice", ["X", "starts", "ends"], ["Y"], device_option=gc ) def slice_ref(x, starts, ends): slc = [slice(None)] * x.ndim slc[dim] = slice(slice_start, slice_end) return [x[slc]] inputs = [X, starts, ends] self.assertReferenceChecks(gc, op, inputs, slice_ref) self.assertDeviceChecks(dc, op, inputs, [0]) self.assertGradientChecks( device_option=gc, op=op, inputs=inputs, outputs_to_check=0, outputs_with_grads=[0], ) @given(ndims=st.integers(min_value=1, max_value=10), **hu.gcs) @settings(deadline=10000) def test_resize_like(self, ndims, gc, dc): X = np.zeros((ndims * 2, )) Y = np.zeros((ndims, 2)) op = core.CreateOperator( "ResizeLike", ["X", "Y"], ["Z"], ) def resize_like(X, Y): return [X.reshape(Y.shape)] self.assertDeviceChecks(dc, op, [X, Y], [0]) self.assertReferenceChecks(gc, op, [X, Y], resize_like, ensure_outputs_are_inferred=True) @given(dtype=st.sampled_from([np.float32, np.int32]), ndims=st.integers(min_value=1, max_value=5), seed=st.integers(min_value=0, max_value=65536), null_axes=st.booleans(), engine=st.sampled_from(['CUDNN', None]), **hu.gcs) @settings(deadline=10000) def test_transpose(self, dtype, ndims, seed, null_axes, engine, gc, dc): if (gc.device_type == caffe2_pb2.CUDA and engine == "CUDNN"): # cudnn 5.1 does not support int. assume(workspace.GetCuDNNVersion() >= 6000 or dtype != np.int32) dims = (np.random.rand(ndims) * 16 + 1).astype(np.int32) X = (np.random.rand(*dims) * 16).astype(dtype) if null_axes: axes = None op = core.CreateOperator( "Transpose", ["input"], ["output"], engine=engine) else: np.random.seed(int(seed)) axes = [int(v) for v in list(np.random.permutation(X.ndim))] op = core.CreateOperator( "Transpose", ["input"], ["output"], axes=axes, engine=engine) def transpose_ref(x, axes): return (np.transpose(x, axes),) self.assertReferenceChecks(gc, op, [X, axes], transpose_ref) @given(m=st.integers(5, 10), n=st.integers(5, 10), o=st.integers(5, 10), nans=st.booleans(), **hu.gcs) @settings(deadline=10000) def test_nan_check(self, m, n, o, nans, gc, dc): other = np.array([1, 2, 3]).astype(np.float32) X = np.random.rand(m, n, o).astype(np.float32) if nans: x_nan = np.random.randint(0, m) y_nan = np.random.randint(0, n) z_nan = np.random.randint(0, o) X[x_nan, y_nan, z_nan] = float('NaN') # print('nans: {}'.format(nans)) # print(X) def nan_reference(X, Y): if not np.isnan(X).any(): return [X] else: return [np.array([])] op = core.CreateOperator( "NanCheck", ["X", "other"], ["Y"] ) try: self.assertReferenceChecks( device_option=gc, op=op, inputs=[X, other], reference=nan_reference, ) if nans: self.assertTrue(False, "Did not fail when presented with NaN!") except RuntimeError: self.assertTrue(nans, "No NaNs but failed") try: self.assertGradientChecks( device_option=gc, op=op, inputs=[X], outputs_to_check=0, outputs_with_grads=[0], ) if nans: self.assertTrue(False, "Did not fail when gradient had NaN!") except RuntimeError: pass @serial.given(n=st.integers(4, 5), m=st.integers(6, 7), d=st.integers(2, 3), **hu.gcs) def test_elementwise_max(self, n, m, d, gc, dc): X = np.random.rand(n, m, d).astype(np.float32) Y = np.random.rand(n, m, d).astype(np.float32) Z = np.random.rand(n, m, d).astype(np.float32) inputs = [X, Y, Z] def max_op(X, Y, Z): return [np.maximum(np.maximum(X, Y), Z)] op = core.CreateOperator( "Max", ["X", "Y", "Z"], ["mx"] ) self.assertReferenceChecks( device_option=gc, op=op, inputs=inputs, reference=max_op, ) self.assertDeviceChecks(dc, op, inputs, [0]) @given(n=st.integers(4, 5), m=st.integers(6, 7), d=st.integers(2, 3), **hu.gcs) @settings(deadline=10000) def test_elementwise_max_grad(self, n, m, d, gc, dc): go = np.random.rand(n, m, d).astype(np.float32) X = np.random.rand(n, m, d).astype(np.float32) Y = np.random.rand(n, m, d).astype(np.float32) Z = np.random.rand(n, m, d).astype(np.float32) mx = np.maximum(np.maximum(X, Y), Z) inputs = [mx, go, X, Y, Z] def max_grad_op(mx, go, X, Y, Z): def mx_grad(a): return go * (mx == a) return [mx_grad(a) for a in [X, Y, Z]] op = core.CreateOperator( "MaxGradient", ["mx", "go", "X", "Y", "Z"], ["gX", "gY", "gZ"] ) self.assertReferenceChecks( device_option=gc, op=op, inputs=inputs, reference=max_grad_op, ) self.assertDeviceChecks(dc, op, inputs, [0, 1, 2]) @serial.given(n=st.integers(4, 5), m=st.integers(6, 7), d=st.integers(2, 3), **hu.gcs) def test_elementwise_min(self, n, m, d, gc, dc): X = np.random.rand(n, m, d).astype(np.float32) Y = np.random.rand(n, m, d).astype(np.float32) Z = np.random.rand(n, m, d).astype(np.float32) inputs = [X, Y, Z] def min_op(X, Y, Z): return [np.minimum(np.minimum(X, Y), Z)] op = core.CreateOperator( "Min", ["X", "Y", "Z"], ["mx"] ) self.assertReferenceChecks( device_option=gc, op=op, inputs=inputs, reference=min_op, ) self.assertDeviceChecks(dc, op, inputs, [0]) @given(n=st.integers(4, 5), m=st.integers(6, 7), d=st.integers(2, 3), **hu.gcs) @settings(deadline=10000) def test_elementwise_min_grad(self, n, m, d, gc, dc): go = np.random.rand(n, m, d).astype(np.float32) X = np.random.rand(n, m, d).astype(np.float32) Y = np.random.rand(n, m, d).astype(np.float32) Z = np.random.rand(n, m, d).astype(np.float32) mx = np.minimum(np.minimum(X, Y), Z) inputs = [mx, go, X, Y, Z] def min_grad_op(mx, go, X, Y, Z): def mx_grad(a): return go * (mx == a) return [mx_grad(a) for a in [X, Y, Z]] op = core.CreateOperator( "MinGradient", ["mx", "go", "X", "Y", "Z"], ["gX", "gY", "gZ"] ) self.assertReferenceChecks( device_option=gc, op=op, inputs=inputs, reference=min_grad_op, ) self.assertDeviceChecks(dc, op, inputs, [0, 1, 2]) @given( n=st.integers(1, 8), m=st.integers(1, 10), d=st.integers(1, 4), in_place=st.booleans(), engine=st.sampled_from(["", "CUDNN"]), seed=st.integers(min_value=0, max_value=65535), dtype=st.sampled_from([np.int32, np.int64, np.float32]), **hu.gcs) @settings(deadline=10000) def test_sum( self, n, m, d, in_place, engine, seed, dtype, gc, dc): input_names = [] input_vars = [] np.random.seed(seed) for i in range(m): X_name = 'X' + str(i) input_names.extend([X_name]) var = np.random.rand(n, d).astype(dtype) vars()[X_name] = var input_vars.append(var) def sum_op_ref(*args): res = np.zeros((n, d)) for i in range(m): res = res + args[i] return (res, ) op = core.CreateOperator( "Sum", input_names, [input_names[0]] if in_place else ['Y'], engine=engine, ) self.assertReferenceChecks( device_option=gc, op=op, inputs=input_vars, reference=sum_op_ref, ) self.assertDeviceChecks(dc, op, input_vars, [0]) @given( inputs=hu.lengths_tensor().flatmap( lambda pair: st.tuples( st.just(pair[0]), st.just(pair[1]), hu.dims(max_value=len(pair[1])), ) ).flatmap( lambda tup: st.tuples( st.just(tup[0]), st.just(tup[1]), hu.arrays( tup[2], dtype=np.int32, elements=st.integers( min_value=0, max_value=len(tup[1]) - 1)), ) ), **hu.gcs_cpu_only) @settings(deadline=1000) def test_lengths_gather(self, inputs, gc, dc): items = inputs[0] lengths = inputs[1] indices = inputs[2] def lengths_gather_op(items, lengths, indices): ends = np.cumsum(lengths) return [np.concatenate( list(items[ends[i] - lengths[i]:ends[i]] for i in indices))] op = core.CreateOperator( "LengthsGather", ["items", "lengths", "indices"], ["output"] ) self.assertReferenceChecks( device_option=gc, op=op, inputs=[items, lengths, indices], reference=lengths_gather_op, ) @given( inputs=hu.lengths_tensor(), **hu.gcs_cpu_only) @settings(deadline=1000) def test_lengths_to_ranges(self, inputs, gc, dc): _, lengths = inputs def lengths_to_ranges_op(lengths): return [ [[x, y] for x, y in zip(np.cumsum(np.append([0], lengths)), lengths)] ] op = core.CreateOperator( "LengthsToRanges", ["lengths"], ["output"] ) self.assertReferenceChecks( device_option=gc, op=op, inputs=[lengths], reference=lengths_to_ranges_op, ) # Test shape inference logic net = core.Net("test_shape_inference") workspace.FeedBlob("lengths", lengths) output = net.LengthsToRanges( ["lengths"], ["output"] ) (shapes, types) = workspace.InferShapesAndTypes([net]) workspace.RunNetOnce(net) self.assertEqual(shapes[output], list(workspace.blobs[output].shape)) self.assertEqual(shapes[output], list(lengths.shape) + [2]) self.assertEqual(types[output], core.DataType.INT32) @given(**hu.gcs) @settings(deadline=None, max_examples=50) def test_size_op(self, gc, dc): X = np.array([[1, 2], [3, 4]]).astype(np.float32) def size_op(tensor): return [np.prod(tensor.shape)] op = core.CreateOperator( "Size", ["X"], ["output"] ) self.assertReferenceChecks( device_option=gc, op=op, inputs=[X], reference=size_op, ) def test_alias_op(self): """ Don't use hypothesis because there are only 2 cases to check""" for size in [0, 5]: X = np.arange(size).astype(np.float32) workspace.FeedBlob('X', X) op = core.CreateOperator( "Alias", ["X"], ["Y"] ) workspace.RunOperatorOnce(op) Y = workspace.FetchBlob('Y') np.testing.assert_array_equal(X, Y) @given(**hu.gcs) @settings(deadline=10000) def test_range(self, gc, dc): names = [ ('stop_',), ('start_', 'stop_'), ('start_', 'stop_', 'step_'), ] # Most random values aren't great here, so use a fixed set instead of # hypothesis. for inputs in ( (10,), (np.float32(10.0),), (0,), (0, 0), (10., 5.0, -1.), (2, 10000), (2, 10000, 20000), (2, 10000, -1), ): inputs = [np.array(v) for v in inputs] op = core.CreateOperator( "Range", names[len(inputs) - 1], ["Y"] ) self.assertReferenceChecks( device_option=gc, op=op, inputs=inputs, reference=lambda *x: [np.arange(*x)], ) self.assertDeviceChecks(dc, op, inputs, [0]) inputs = (np.array(0), np.array(10), np.array(0)) op = core.CreateOperator( "Range", names[len(inputs) - 1], ["Y"] ) with six.assertRaisesRegex(self, RuntimeError, 'Step size cannot be 0'): self.assertReferenceChecks( device_option=gc, op=op, inputs=inputs, reference=lambda *x: [np.arange(*x)], )
class TestPairWiseLossOps(hu.HypothesisTestCase): @given(X=hu.arrays(dims=[2, 1], elements=st.floats(min_value=0.0, max_value=10.0)), label=hu.arrays(dims=[2, 1], elements=st.integers(min_value=0, max_value=1), dtype=np.float32), **hu.gcs_cpu_only) def test_pair_wise_loss_predictions(self, X, label, gc, dc): workspace.FeedBlob('X', X) workspace.FeedBlob('label', label) new_label = np.array([label[1], label[0]]) new_x = np.array([X[1], X[0]]) workspace.FeedBlob('new_x', new_x) workspace.FeedBlob('new_label', new_label) net = core.Net('net') net.PairWiseLoss(['X', 'label'], ['output']) net.PairWiseLoss(['new_x', 'new_label'], ['new_output']) plan = core.Plan('predict_data') plan.AddStep(core.execution_step('predict_data', [net], num_iter=1)) workspace.RunPlan(plan) output = workspace.FetchBlob('output') new_output = workspace.FetchBlob('new_output') sign = 1 if label[0] > label[1] else -1 if label[0] == label[1]: self.assertEqual(np.asscalar(output), 0) return self.assertAlmostEqual(np.asscalar(output), np.asscalar( np.log(1 + np.exp(sign * (X[1] - X[0])))), delta=1e-4) # check swapping row order doesn't alter overall loss self.assertAlmostEqual(output, new_output) @given(X=hu.arrays(dims=[2, 1], elements=st.floats(min_value=0.0, max_value=10.0)), label=hu.arrays(dims=[2, 1], elements=st.integers(min_value=0, max_value=1), dtype=np.float32), dY=hu.arrays(dims=[1], elements=st.floats(min_value=1, max_value=10)), **hu.gcs_cpu_only) def test_pair_wise_loss_gradient(self, X, label, dY, gc, dc): workspace.FeedBlob('X', X) workspace.FeedBlob('dY', dY) workspace.FeedBlob('label', label) net = core.Net('net') net.PairWiseLossGradient( ['X', 'label', 'dY'], ['dX'], ) plan = core.Plan('predict_data') plan.AddStep(core.execution_step('predict_data', [net], num_iter=1)) workspace.RunPlan(plan) dx = workspace.FetchBlob('dX') sign = 1 if label[0] > label[1] else -1 if label[0] == label[1]: self.assertEqual(np.asscalar(dx[0]), 0) return self.assertAlmostEqual(np.asscalar(dx[0]), np.asscalar(-dY[0] * sign / (1 + np.exp(sign * (X[0] - X[1])))), delta=1e-2 * abs(np.asscalar(dx[0]))) self.assertEqual(np.asscalar(dx[0]), np.asscalar(-dx[1])) delta = 1e-3 up_x = np.array([[X[0] + delta], [X[1]]], dtype=np.float32) down_x = np.array([[X[0] - delta], [X[1]]], dtype=np.float32) workspace.FeedBlob('up_x', up_x) workspace.FeedBlob('down_x', down_x) new_net = core.Net('new_net') new_net.PairWiseLoss(['up_x', 'label'], ['up_output']) new_net.PairWiseLoss(['down_x', 'label'], ['down_output']) plan = core.Plan('predict_data') plan.AddStep(core.execution_step('predict_data', [new_net], num_iter=1)) workspace.RunPlan(plan) down_output_pred = workspace.FetchBlob('down_output') up_output_pred = workspace.FetchBlob('up_output') self.assertAlmostEqual( np.asscalar(dx[0]), np.asscalar(0.5 * dY[0] * (up_output_pred[0] - down_output_pred[0]) / delta), delta=abs(np.asscalar(dx[0]) * 1e-2))
class TestRegularizer(LayersTestCase): @given(X=hu.arrays(dims=[2, 5], elements=hu.floats(min_value=-1.0, max_value=1.0))) def test_log_barrier(self, X): param = core.BlobReference("X") workspace.FeedBlob(param, X) train_init_net, train_net = self.get_training_nets() reg = regularizer.LogBarrier(1.0) output = reg(train_net, train_init_net, param, by=RegularizationBy.ON_LOSS) reg( train_net, train_init_net, param, grad=None, by=RegularizationBy.AFTER_OPTIMIZER, ) workspace.RunNetOnce(train_init_net) workspace.RunNetOnce(train_net) def ref(X): return ( np.array(np.sum(-np.log(np.clip(X, 1e-9, None))) * 0.5).astype( np.float32 ), np.clip(X, 1e-9, None), ) for x, y in zip(workspace.FetchBlobs([output, param]), ref(X)): npt.assert_allclose(x, y, rtol=1e-3) @given( X=hu.arrays(dims=[2, 5], elements=hu.floats(min_value=-1.0, max_value=1.0)), left_open=st.booleans(), right_open=st.booleans(), eps=hu.floats(min_value=1e-6, max_value=1e-4), ub=hu.floats(min_value=-1.0, max_value=1.0), lb=hu.floats(min_value=-1.0, max_value=1.0), **hu.gcs_cpu_only ) def test_bounded_grad_proj(self, X, left_open, right_open, eps, ub, lb, gc, dc): if ub - (eps if right_open else 0.) < lb + (eps if left_open else 0.): return param = core.BlobReference("X") workspace.FeedBlob(param, X) train_init_net, train_net = self.get_training_nets() reg = regularizer.BoundedGradientProjection( lb=lb, ub=ub, left_open=left_open, right_open=right_open, epsilon=eps ) output = reg(train_net, train_init_net, param, by=RegularizationBy.ON_LOSS) reg( train_net, train_init_net, param, grad=None, by=RegularizationBy.AFTER_OPTIMIZER, ) workspace.RunNetOnce(train_init_net) workspace.RunNetOnce(train_net) def ref(X): return np.clip( X, lb + (eps if left_open else 0.), ub - (eps if right_open else 0.) ) assert output is None npt.assert_allclose(workspace.blobs[param], ref(X), atol=1e-7) @given( output_dim=st.integers(1, 10), input_num=st.integers(3, 30), reg_weight=st.integers(0, 10) ) def test_group_l1_norm(self, output_dim, input_num, reg_weight): """ 1. create a weight blob 2. create random group splits 3. run group_l1_nrom with the weight blob 4. run equivalent np operations to calculate group l1 norm 5. compare if the results from 3 and 4 are equal """ def compare_reference(weight, group_boundaries, reg_lambda, output): group_splits = np.hsplit(weight, group_boundaries[1:-1]) l2_reg = np.sqrt([np.sum(np.square(g)) for g in group_splits]) l2_normalized = np.multiply(l2_reg, np.array([np.sqrt(g.shape[1]) for g in group_splits])) result = np.multiply(np.sum(l2_normalized), reg_lambda) npt.assert_almost_equal(result, workspace.blobs[output], decimal=2) weight = np.random.rand(output_dim, input_num).astype(np.float32) feature_num = np.random.randint(low=1, high=input_num - 1) group_boundaries = [0] group_boundaries = np.append( group_boundaries, np.sort( np.random.choice(range(1, input_num - 1), feature_num, replace=False) ), ) group_boundaries = np.append(group_boundaries, [input_num]) split_info = np.diff(group_boundaries) weight_blob = core.BlobReference("weight_blob") workspace.FeedBlob(weight_blob, weight) train_init_net, train_net = self.get_training_nets() reg = regularizer.GroupL1Norm(reg_weight * 0.1, split_info.tolist()) output = reg( train_net, train_init_net, weight_blob, by=RegularizationBy.ON_LOSS ) workspace.RunNetOnce(train_init_net) workspace.RunNetOnce(train_net) compare_reference(weight, group_boundaries, reg_weight * 0.1, output) @given( param_dim=st.integers(10, 30), k=st.integers(5, 9), reg_weight=st.integers(0, 10) ) def test_l1_norm_trimmed(self, param_dim, k, reg_weight): weight = np.random.rand(param_dim).astype(np.float32) weight_blob = core.BlobReference("weight_blob") workspace.FeedBlob(weight_blob, weight) train_init_net, train_net = self.get_training_nets() reg = regularizer.L1NormTrimmed(reg_weight * 0.1, k) output = reg( train_net, train_init_net, weight_blob, by=RegularizationBy.ON_LOSS ) workspace.RunNetOnce(train_init_net) workspace.RunNetOnce(train_net) result = np.sum(np.sort(np.absolute(weight))[:(param_dim - k)]) * reg_weight * 0.1 npt.assert_almost_equal(result, workspace.blobs[output], decimal=2) @given( param_dim=st.integers(10, 30), k=st.integers(5, 9), l1=st.integers(0, 10), l2=st.integers(0, 10) ) def test_elastic_l1_norm_trimmed(self, param_dim, k, l1, l2): weight = np.random.rand(param_dim).astype(np.float32) weight_blob = core.BlobReference("weight_blob") workspace.FeedBlob(weight_blob, weight) train_init_net, train_net = self.get_training_nets() reg = regularizer.ElasticNetL1NormTrimmed(l1 * 0.1, l2 * 0.1, k) output = reg( train_net, train_init_net, weight_blob, by=RegularizationBy.ON_LOSS ) workspace.RunNetOnce(train_init_net) workspace.RunNetOnce(train_net) l1_norm = np.sum(np.sort(np.absolute(weight))[:(param_dim - k)]) l2_norm = np.sum(np.square(weight)) result = l1_norm * l1 * 0.1 + l2_norm * l2 * 0.1 npt.assert_almost_equal(result, workspace.blobs[output], decimal=2) @given( row_dim=st.integers(5, 10), norm=st.floats(min_value=1.0, max_value=4.0), data_strategy=st.data(), ) def test_fp16_max_norm(self, row_dim, norm, data_strategy): weight = np.random.rand(row_dim, 5).astype(np.float16) grad = np.random.rand(row_dim, 5).astype(np.float16) # generate indices that will be updated indices = data_strategy.draw( hu.tensor( dtype=np.int64, min_dim=1, max_dim=1, elements=st.sampled_from(np.arange(weight.shape[0])), ) ) indices = np.unique(indices) # compute expected result result = weight.copy() # prevent dived by zero eps = 1e-12 norms = np.sqrt(np.sum(result[indices, ] ** 2, axis=1, keepdims=True)) # if the norms are smaller than max_norm, then it doesn't need update desired = np.clip(norms, 0, norm) # apply max norm result[indices, ] *= desired / (eps + norms) weight_blob = core.BlobReference("weight_blob") workspace.FeedBlob(weight_blob, weight) grad_blob = core.BlobReference("grad_blob") workspace.FeedBlob(grad_blob, grad) indices_blob = core.BlobReference("indices") workspace.FeedBlob(indices_blob, indices) grad_blob_slice = core.GradientSlice(indices=indices_blob, values=grad_blob) train_init_net, train_net = self.get_training_nets() reg = regularizer.MaxNorm(norm, dtype='fp16') reg( train_net, train_init_net, weight_blob, grad_blob_slice, by=RegularizationBy.AFTER_OPTIMIZER ) workspace.RunNetOnce(train_init_net) workspace.RunNetOnce(train_net) npt.assert_almost_equal(result, workspace.FetchBlob('weight_blob'), decimal=2)
class TestUtilityOps(hu.HypothesisTestCase): @given(X=hu.tensor(), args=st.booleans(), **hu.gcs) def test_slice(self, X, args, gc, dc): X = X.astype(dtype=np.float32) dim = random.randint(0, X.ndim - 1) slice_start = random.randint(0, X.shape[dim] - 1) slice_end = random.randint(slice_start, X.shape[dim] - 1) starts = np.array([0] * X.ndim).astype(np.int32) ends = np.array([-1] * X.ndim).astype(np.int32) starts[dim] = slice_start ends[dim] = slice_end if args: op = core.CreateOperator("Slice", ["X"], ["Y"], starts=starts, ends=ends, device_option=gc) def slice_ref(X): slc = [slice(None)] * X.ndim slc[dim] = slice(slice_start, slice_end) return [X[slc]] inputs = [X] else: op = core.CreateOperator("Slice", ["X", "starts", "ends"], ["Y"], device_option=gc) def slice_ref(x, starts, ends): slc = [slice(None)] * x.ndim slc[dim] = slice(slice_start, slice_end) return [x[slc]] inputs = [X, starts, ends] self.assertReferenceChecks(gc, op, inputs, slice_ref) self.assertDeviceChecks(dc, op, inputs, [0]) self.assertGradientChecks( device_option=gc, op=op, inputs=inputs, outputs_to_check=0, outputs_with_grads=[0], ) @given(dtype=st.sampled_from([np.float32, np.int32]), ndims=st.integers(min_value=1, max_value=5), seed=st.integers(min_value=0, max_value=65536), null_axes=st.booleans(), engine=st.sampled_from(['CUDNN', None]), **hu.gcs) def test_transpose(self, dtype, ndims, seed, null_axes, engine, gc, dc): dims = (np.random.rand(ndims) * 16 + 1).astype(np.int32) X = (np.random.rand(*dims) * 16).astype(dtype) if null_axes: axes = None op = core.CreateOperator("Transpose", ["input"], ["output"], engine=engine) else: np.random.seed(int(seed)) axes = [int(v) for v in list(np.random.permutation(X.ndim))] op = core.CreateOperator("Transpose", ["input"], ["output"], axes=axes, engine=engine) def transpose_ref(x, axes): return (np.transpose(x, axes), ) self.assertReferenceChecks(gc, op, [X, axes], transpose_ref) @unittest.skipIf(not workspace.has_gpu_support, "No gpu support") def test_gpu_transpose_minusones(self): ''' Repro a problem with earlier version of CuDNN Transpose Op that casted ints to floats. ''' X = -np.ones((2, 10)).astype(np.int32) with core.DeviceScope(core.DeviceOption(caffe2_pb2.CUDA, 0)): workspace.FeedBlob("X", X) print("X:\n{}\n".format(workspace.FetchBlob("X"))) op = core.CreateOperator("Transpose", ["X"], ["Y"], engine='CUDNN') workspace.RunOperatorOnce(op) Y = workspace.FetchBlob("Y") print("Y:\n{}\n".format(Y)) for j in list(Y.flatten()): self.assertEqual(-1, j) @given(m=st.integers(5, 10), n=st.integers(5, 10), o=st.integers(5, 10), nans=st.booleans(), **hu.gcs) def test_nan_check(self, m, n, o, nans, gc, dc): other = np.array([1, 2, 3]).astype(np.float32) X = np.random.rand(m, n, o).astype(np.float32) if nans: x_nan = np.random.randint(0, m) y_nan = np.random.randint(0, n) z_nan = np.random.randint(0, o) X[x_nan, y_nan, z_nan] = float('NaN') # print('nans: {}'.format(nans)) # print(X) def nan_reference(X, Y): if not np.isnan(X).any(): return [X] else: return [np.array([])] op = core.CreateOperator("NanCheck", ["X", "other"], ["Y"]) try: self.assertReferenceChecks( device_option=gc, op=op, inputs=[X, other], reference=nan_reference, ) if nans: self.assertTrue(False, "Did not fail when presented with NaN!") except RuntimeError: self.assertTrue(nans, "No NaNs but failed") try: self.assertGradientChecks( device_option=gc, op=op, inputs=[X], outputs_to_check=0, outputs_with_grads=[0], ) if nans: self.assertTrue(False, "Did not fail when gradient had NaN!") except RuntimeError: pass @given(n=st.integers(4, 5), m=st.integers(6, 7), d=st.integers(2, 3), **hu.gcs) def test_elementwise_max(self, n, m, d, gc, dc): X = np.random.rand(n, m, d).astype(np.float32) Y = np.random.rand(n, m, d).astype(np.float32) Z = np.random.rand(n, m, d).astype(np.float32) inputs = [X, Y, Z] def max_op(X, Y, Z): return [np.maximum(np.maximum(X, Y), Z)] op = core.CreateOperator("Max", ["X", "Y", "Z"], ["mx"]) self.assertReferenceChecks( device_option=gc, op=op, inputs=inputs, reference=max_op, ) self.assertDeviceChecks(dc, op, inputs, [0]) @given(n=st.integers(4, 5), m=st.integers(6, 7), d=st.integers(2, 3), **hu.gcs) def test_elementwise_max_grad(self, n, m, d, gc, dc): go = np.random.rand(n, m, d).astype(np.float32) X = np.random.rand(n, m, d).astype(np.float32) Y = np.random.rand(n, m, d).astype(np.float32) Z = np.random.rand(n, m, d).astype(np.float32) mx = np.maximum(np.maximum(X, Y), Z) inputs = [mx, go, X, Y, Z] def max_grad_op(mx, go, X, Y, Z): def mx_grad(a): return go * (mx == a) return [mx_grad(a) for a in [X, Y, Z]] op = core.CreateOperator("MaxGradient", ["mx", "go", "X", "Y", "Z"], ["gX", "gY", "gZ"]) self.assertReferenceChecks( device_option=gc, op=op, inputs=inputs, reference=max_grad_op, ) self.assertDeviceChecks(dc, op, inputs, [0, 1, 2]) @given(n=st.integers(4, 5), m=st.integers(6, 7), d=st.integers(2, 3), **hu.gcs) def test_elementwise_min(self, n, m, d, gc, dc): X = np.random.rand(n, m, d).astype(np.float32) Y = np.random.rand(n, m, d).astype(np.float32) Z = np.random.rand(n, m, d).astype(np.float32) inputs = [X, Y, Z] def min_op(X, Y, Z): return [np.minimum(np.minimum(X, Y), Z)] op = core.CreateOperator("Min", ["X", "Y", "Z"], ["mx"]) self.assertReferenceChecks( device_option=gc, op=op, inputs=inputs, reference=min_op, ) self.assertDeviceChecks(dc, op, inputs, [0]) @given(n=st.integers(4, 5), m=st.integers(6, 7), d=st.integers(2, 3), **hu.gcs) def test_elementwise_min_grad(self, n, m, d, gc, dc): go = np.random.rand(n, m, d).astype(np.float32) X = np.random.rand(n, m, d).astype(np.float32) Y = np.random.rand(n, m, d).astype(np.float32) Z = np.random.rand(n, m, d).astype(np.float32) mx = np.minimum(np.minimum(X, Y), Z) inputs = [mx, go, X, Y, Z] def min_grad_op(mx, go, X, Y, Z): def mx_grad(a): return go * (mx == a) return [mx_grad(a) for a in [X, Y, Z]] op = core.CreateOperator("MinGradient", ["mx", "go", "X", "Y", "Z"], ["gX", "gY", "gZ"]) self.assertReferenceChecks( device_option=gc, op=op, inputs=inputs, reference=min_grad_op, ) self.assertDeviceChecks(dc, op, inputs, [0, 1, 2]) @given(inputs=hu.lengths_tensor().flatmap(lambda pair: st.tuples( st.just(pair[0]), st.just(pair[1]), hu.dims(max_value=len(pair[1])), )).flatmap(lambda tup: st.tuples( st.just(tup[0]), st.just(tup[1]), hu.arrays(tup[2], dtype=np.int32, elements=st.integers(min_value=0, max_value=len(tup[1]) - 1) ), )), **hu.gcs_cpu_only) def test_lengths_gather(self, inputs, gc, dc): items = inputs[0] lengths = inputs[1] indices = inputs[2] def lengths_gather_op(items, lengths, indices): ends = np.cumsum(lengths) return [ np.concatenate( list(items[ends[i] - lengths[i]:ends[i]] for i in indices)) ] op = core.CreateOperator("LengthsGather", ["items", "lengths", "indices"], ["output"]) self.assertReferenceChecks( device_option=gc, op=op, inputs=[items, lengths, indices], reference=lengths_gather_op, ) @given(**hu.gcs) def test_size_op(self, gc, dc): X = np.array([[1, 2], [3, 4]]).astype(np.float32) def size_op(tensor): return [np.prod(tensor.shape)] op = core.CreateOperator("Size", ["X"], ["output"]) self.assertReferenceChecks( device_option=gc, op=op, inputs=[X], reference=size_op, ) def test_alias_op(self): """ Don't use hypothesis because there are only 2 cases to check""" for size in [0, 5]: X = np.arange(size).astype(np.float32) workspace.FeedBlob('X', X) op = core.CreateOperator("Alias", ["X"], ["Y"]) workspace.RunOperatorOnce(op) Y = workspace.FetchBlob('Y') np.testing.assert_array_equal(X, Y) @given(**hu.gcs) def test_range(self, gc, dc): names = [ ('stop_', ), ('start_', 'stop_'), ('start_', 'stop_', 'step_'), ] # Most random values aren't great here, so use a fixed set instead of # hypothesis. for inputs in ( (10, ), (np.float32(10.0), ), (0, ), (0, 0), (10., 5.0, -1.), (2, 10000), (2, 10000, 20000), (2, 10000, -1), ): inputs = [np.array(v) for v in inputs] op = core.CreateOperator("Range", names[len(inputs) - 1], ["Y"]) self.assertReferenceChecks( device_option=gc, op=op, inputs=inputs, reference=lambda *x: [np.arange(*x)], ) self.assertDeviceChecks(dc, op, inputs, [0]) with self.assertRaisesRegexp(RuntimeError, 'Step size cannot be 0'): inputs = (np.array(0), np.array(10), np.array(0)) op = core.CreateOperator("Range", names[len(inputs) - 1], ["Y"]) self.assertReferenceChecks( device_option=gc, op=op, inputs=inputs, reference=lambda *x: [np.arange(*x)], )
class TestFillerOperator(hu.HypothesisTestCase): @given(**hu.gcs) def test_shape_error(self, gc, dc): op = core.CreateOperator( 'GaussianFill', [], 'out', shape=32, # illegal parameter mean=0.0, std=1.0, ) exception = False try: workspace.RunOperatorOnce(op) except Exception: exception = True self.assertTrue(exception, "Did not throw exception on illegal shape") op = core.CreateOperator( 'ConstantFill', [], 'out', shape=[], # scalar value=2.0, ) exception = False self.assertTrue(workspace.RunOperatorOnce(op)) self.assertEqual(workspace.FetchBlob('out'), [2.0]) @given(shape=hu.dims().flatmap(lambda dims: hu.arrays( [dims], dtype=np.int64, elements=st.integers(min_value=0, max_value=20))), a=st.integers(min_value=0, max_value=100), b=st.integers(min_value=0, max_value=100), **hu.gcs) def test_uniform_int_fill_op_blob_input(self, shape, a, b, gc, dc): net = core.Net('test_net') with core.DeviceScope(core.DeviceOption(caffe2_pb2.CPU)): shape_blob = net.Const(shape, dtype=np.int64) a_blob = net.Const(a, dtype=np.int32) b_blob = net.Const(b, dtype=np.int32) uniform_fill = net.UniformIntFill([shape_blob, a_blob, b_blob], 1, input_as_shape=1) workspace.RunNetOnce(net) blob_out = workspace.FetchBlob(uniform_fill) if b < a: new_shape = shape[:] new_shape[0] = 0 np.testing.assert_array_equal(new_shape, blob_out.shape) else: np.testing.assert_array_equal(shape, blob_out.shape) self.assertTrue((blob_out >= a).all()) self.assertTrue((blob_out <= b).all()) @given(shape=st.sampled_from([ [3, 3], [5, 5, 5], [7, 7, 7, 7], ]), **hu.gcs) def test_diagonal_fill_op_float(self, shape, gc, dc): value = 2.5 op = core.CreateOperator( 'DiagonalFill', [], 'out', shape=shape, # scalar value=value, ) for device_option in dc: op.device_option.CopyFrom(device_option) # Check against numpy reference self.assertReferenceChecks(gc, op, [shape, value], _fill_diagonal) @given(**hu.gcs) def test_diagonal_fill_op_int(self, gc, dc): value = 2 shape = [3, 3] op = core.CreateOperator( 'DiagonalFill', [], 'out', shape=shape, dtype=core.DataType.INT32, value=value, ) # Check against numpy reference self.assertReferenceChecks(gc, op, [shape, value], _fill_diagonal) @given(**hu.gcs) def test_gaussian_fill_op(self, gc, dc): op = core.CreateOperator( 'GaussianFill', [], 'out', shape=[17, 3, 3], # sample odd dimensions mean=0.0, std=1.0, ) for device_option in dc: op.device_option.CopyFrom(device_option) assert workspace.RunOperatorOnce( op), "GaussianFill op did not run " "successfully" blob_out = workspace.FetchBlob('out') assert np.count_nonzero( blob_out) > 0, "All generated elements are " "zeros. Is the random generator functioning correctly?" @given(**hu.gcs) def test_msra_fill_op(self, gc, dc): op = core.CreateOperator( 'MSRAFill', [], 'out', shape=[15, 5, 3], # sample odd dimensions ) for device_option in dc: op.device_option.CopyFrom(device_option) assert workspace.RunOperatorOnce(op), "MSRAFill op did not run " "successfully" blob_out = workspace.FetchBlob('out') assert np.count_nonzero( blob_out) > 0, "All generated elements are " "zeros. Is the random generator functioning correctly?"
class TestRegularizer(LayersTestCase): @given(X=hu.arrays(dims=[2, 5], elements=st.floats(min_value=-1.0, max_value=1.0))) def test_log_barrier(self, X): param = core.BlobReference("X") workspace.FeedBlob(param, X) train_init_net, train_net = self.get_training_nets() reg = regularizer.LogBarrier(1.0) output = reg(train_net, train_init_net, param, by=RegularizationBy.ON_LOSS) reg( train_net, train_init_net, param, grad=None, by=RegularizationBy.AFTER_OPTIMIZER, ) workspace.RunNetOnce(train_init_net) workspace.RunNetOnce(train_net) def ref(X): return ( np.array(np.sum(-np.log(np.clip(X, 1e-9, None))) * 0.5).astype( np.float32 ), np.clip(X, 1e-9, None), ) for x, y in zip(workspace.FetchBlobs([output, param]), ref(X)): npt.assert_allclose(x, y, rtol=1e-3) @given( X=hu.arrays(dims=[2, 5], elements=st.floats(min_value=-1.0, max_value=1.0)), left_open=st.booleans(), right_open=st.booleans(), eps=st.floats(min_value=1e-6, max_value=1e-4), ub=st.floats(min_value=-1.0, max_value=1.0), lb=st.floats(min_value=-1.0, max_value=1.0), **hu.gcs_cpu_only ) def test_bounded_grad_proj(self, X, left_open, right_open, eps, ub, lb, gc, dc): if ub - (eps if right_open else 0.) < lb + (eps if left_open else 0.): return param = core.BlobReference("X") workspace.FeedBlob(param, X) train_init_net, train_net = self.get_training_nets() reg = regularizer.BoundedGradientProjection( lb=lb, ub=ub, left_open=left_open, right_open=right_open, epsilon=eps ) output = reg(train_net, train_init_net, param, by=RegularizationBy.ON_LOSS) reg( train_net, train_init_net, param, grad=None, by=RegularizationBy.AFTER_OPTIMIZER, ) workspace.RunNetOnce(train_init_net) workspace.RunNetOnce(train_net) def ref(X): return np.clip( X, lb + (eps if left_open else 0.), ub - (eps if right_open else 0.) ) assert output is None npt.assert_allclose(workspace.blobs[param], ref(X), atol=1e-7)
class TestPairWiseLossOps(serial.SerializedTestCase): @given(X=hu.arrays(dims=[2, 1], elements=st.floats(min_value=0.0, max_value=10.0)), label=hu.arrays(dims=[2, 1], elements=st.integers(min_value=0, max_value=1), dtype=np.float32), **hu.gcs_cpu_only) def test_pair_wise_loss_predictions(self, X, label, gc, dc): workspace.FeedBlob('X', X) workspace.FeedBlob('label', label) new_label = np.array([label[1], label[0]]) new_x = np.array([X[1], X[0]]) workspace.FeedBlob('new_x', new_x) workspace.FeedBlob('new_label', new_label) net = core.Net('net') net.PairWiseLoss(['X', 'label'], ['output']) net.PairWiseLoss(['new_x', 'new_label'], ['new_output']) plan = core.Plan('predict_data') plan.AddStep(core.execution_step('predict_data', [net], num_iter=1)) workspace.RunPlan(plan) output = workspace.FetchBlob('output') new_output = workspace.FetchBlob('new_output') sign = 1 if label[0] > label[1] else -1 if label[0] == label[1]: self.assertEqual(np.asscalar(output), 0) return self.assertAlmostEqual(np.asscalar(output), np.asscalar( np.log(1 + np.exp(sign * (X[1] - X[0])))), delta=1e-4) # check swapping row order doesn't alter overall loss self.assertAlmostEqual(output, new_output) @given(X=hu.arrays(dims=[2, 1], elements=st.floats(min_value=0.0, max_value=10.0)), label=hu.arrays(dims=[2, 1], elements=st.integers(min_value=0, max_value=1), dtype=np.float32), dY=hu.arrays(dims=[1], elements=st.floats(min_value=1, max_value=10)), **hu.gcs_cpu_only) def test_pair_wise_loss_gradient(self, X, label, dY, gc, dc): workspace.FeedBlob('X', X) workspace.FeedBlob('dY', dY) workspace.FeedBlob('label', label) net = core.Net('net') net.PairWiseLossGradient( ['X', 'label', 'dY'], ['dX'], ) plan = core.Plan('predict_data') plan.AddStep(core.execution_step('predict_data', [net], num_iter=1)) workspace.RunPlan(plan) dx = workspace.FetchBlob('dX') sign = 1 if label[0] > label[1] else -1 if label[0] == label[1]: self.assertEqual(np.asscalar(dx[0]), 0) return self.assertAlmostEqual(np.asscalar(dx[0]), np.asscalar(-dY[0] * sign / (1 + np.exp(sign * (X[0] - X[1])))), delta=1e-2 * abs(np.asscalar(dx[0]))) self.assertEqual(np.asscalar(dx[0]), np.asscalar(-dx[1])) delta = 1e-3 up_x = np.array([[X[0] + delta], [X[1]]], dtype=np.float32) down_x = np.array([[X[0] - delta], [X[1]]], dtype=np.float32) workspace.FeedBlob('up_x', up_x) workspace.FeedBlob('down_x', down_x) new_net = core.Net('new_net') new_net.PairWiseLoss(['up_x', 'label'], ['up_output']) new_net.PairWiseLoss(['down_x', 'label'], ['down_output']) plan = core.Plan('predict_data') plan.AddStep(core.execution_step('predict_data', [new_net], num_iter=1)) workspace.RunPlan(plan) down_output_pred = workspace.FetchBlob('down_output') up_output_pred = workspace.FetchBlob('up_output') np.testing.assert_allclose( np.asscalar(dx[0]), np.asscalar(0.5 * dY[0] * (up_output_pred[0] - down_output_pred[0]) / delta), rtol=1e-2, atol=1e-2) @serial.given(n=st.integers(0, 10), k=st.integers(1, 5), **hu.gcs_cpu_only) def test_pair_wise_loss_batch(self, n, k, gc, dc): lengths = np.random.randint(k, size=n).astype(np.int32) + 1 X = np.random.rand(sum(lengths)).astype(np.float32) label = np.random.randint(k, size=sum(lengths)).astype(np.float32) def pair_wise_op(X, label, lengths): N = lengths.size output = np.zeros(N).astype(np.float32) def f(x): return np.log(1 + np.exp(x)) offset = 0 for idx in range(N): offset += lengths[idx - 1] if idx > 0 else 0 count = 0 for i in range(offset, offset + lengths[idx]): for j in range(offset, i): if label[i] == label[j]: continue sign = 1 if label[i] > label[j] else -1 output[idx] += f(sign * (X[j] - X[i])) count += 1 if count > 0: output[idx] /= count return [output] op = core.CreateOperator('PairWiseLoss', ['X', 'label', 'lengths'], 'out') # Check against numpy reference self.assertReferenceChecks( device_option=gc, op=op, inputs=[X, label, lengths], reference=pair_wise_op, ) # Check over multiple devices self.assertDeviceChecks(dc, op, [X, label, lengths], [0]) # Gradient check self.assertGradientChecks(gc, op, [X, label, lengths], 0, [0])
class TestCrossEntropyOps(hu.HypothesisTestCase): @given( inputs=st.lists( elements=st.integers(min_value=1, max_value=5), min_size=1, max_size=2, average_size=2, ).flatmap( lambda shape: st.tuples( hu.arrays( dims=shape, elements=st.one_of( st.floats(min_value=-1.0, max_value=-0.1), st.floats(min_value=0.1, max_value=1.0), )), hu.arrays( dims=shape, elements=st.sampled_from([0.0, 1.0]), ), ) ), options=st.one_of( st.tuples(st.just(True), st.just(False)), st.tuples(st.just(False), st.just(True)), st.tuples(st.just(False), st.just(False)) ), **hu.gcs ) def test_sigmoid_cross_entropy_with_logits( self, inputs, options, gc, dc ): logits, targets = inputs log_D_trick, unjoined_lr_loss = options def sigmoid_xentr_logit_ref(logits, targets): if unjoined_lr_loss: s = unjoined_sigmoid_cross_entropy(logits, targets) else: s = ( sigmoid_cross_entropy_with_logits(logits, targets) if not log_D_trick else sigmoid_cross_entropy_with_logits_with_log_D_trick( logits, targets ) ) m = np.mean(s, axis=len(logits.shape) - 1) return (m, ) def sigmoid_xentr_logit_grad_ref(g_out, outputs, fwd_inputs): fwd_logits, fwd_targets = fwd_inputs inner_size = fwd_logits.shape[-1] if unjoined_lr_loss: m = unjoined_sigmoid_cross_entropy_grad(logits, targets) else: m = ( sigmoid_cross_entropy_with_logits_grad(fwd_logits, fwd_targets) if not log_D_trick else sigmoid_cross_entropy_with_logits_with_log_D_trick_grad( fwd_logits, fwd_targets ) ) # m = fwd_targets - sigmoid(fwd_logits) g_in = -np.expand_dims(g_out, axis=-1) * m / inner_size return (g_in, None) op = core.CreateOperator( 'SigmoidCrossEntropyWithLogits', ['logits', 'targets'], ['xentropy'], log_D_trick=log_D_trick, unjoined_lr_loss=unjoined_lr_loss ) self.assertReferenceChecks( device_option=gc, op=op, inputs=[logits, targets], reference=sigmoid_xentr_logit_ref, output_to_grad='xentropy', grad_reference=sigmoid_xentr_logit_grad_ref) @given( log_D_trick=st.just(False), **hu.gcs_cpu_only ) def test_cross_entropy_and_unjoied_cross_entropy_relation( self, log_D_trick, gc, dc ): logits = np.array([1.4720, 0.3500, -0.6529, -1.1908, 0.8357, -1.0774, -0.3395, -0.2469, 0.6708, -1.8332], dtype='f') targets = np.array([1., 1., 1., 1., 1., 1., 0., 0., 0., 0.], dtype='f') lr_size = targets.size unjoined_lr_loss = False def sigmoid_xentr_logit_ref(logits, targets): if unjoined_lr_loss: s = unjoined_sigmoid_cross_entropy(logits, targets) else: s = sigmoid_cross_entropy_with_logits(logits, targets) m = np.mean(s, axis=len(logits.shape) - 1) return (m, ) def sigmoid_xentr_logit_grad_ref(g_out, outputs, fwd_inputs): fwd_logits, fwd_targets = fwd_inputs inner_size = fwd_logits.shape[-1] if unjoined_lr_loss: m = unjoined_sigmoid_cross_entropy_grad(logits, targets) else: m = sigmoid_cross_entropy_with_logits_grad( fwd_logits, fwd_targets) # m = fwd_targets - sigmoid(fwd_logits) g_in = -np.expand_dims(g_out, axis=-1) * m / inner_size return (g_in, None) op = core.CreateOperator( 'SigmoidCrossEntropyWithLogits', ['logits', 'targets'], ['xentropy'], log_D_trick=log_D_trick, unjoined_lr_loss=unjoined_lr_loss ) output_lr = self.assertReferenceChecks( device_option=gc, op=op, inputs=[logits, targets], reference=sigmoid_xentr_logit_ref, output_to_grad='xentropy', grad_reference=sigmoid_xentr_logit_grad_ref) # Unjoined dataset where labels change later logits = np.array([1.4720, 0.3500, -0.6529, -1.1908, 0.8357, -1.0774, -0.3395, -0.2469, 0.6708, -1.8332, 1.4720, 0.3500, -0.6529, -1.1908, 0.8357, -1.0774], dtype='f') targets = np.array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 1., 1., 1., 1., 1.], dtype='f') unjoined_lr_loss = True unjoined_lr_size = targets.size op = core.CreateOperator( 'SigmoidCrossEntropyWithLogits', ['logits', 'targets'], ['xentropy'], log_D_trick=log_D_trick, unjoined_lr_loss=unjoined_lr_loss ) outputs_unjoined_lr = self.assertReferenceChecks( device_option=gc, op=op, inputs=[logits, targets], reference=sigmoid_xentr_logit_ref, output_to_grad='xentropy', grad_reference=sigmoid_xentr_logit_grad_ref) self.assertAlmostEqual( output_lr[0].item(0) * lr_size / unjoined_lr_size, outputs_unjoined_lr[0].item(0), delta=0.0001) @given( inputs=st.lists( elements=st.integers(min_value=1, max_value=5), min_size=1, max_size=2, average_size=2, ).flatmap( lambda shape: st.tuples( hu.arrays( dims=shape, elements=st.one_of( st.floats(min_value=-1.0, max_value=-0.1), st.floats(min_value=0.1, max_value=1.0), )), hu.arrays( dims=shape, elements=st.sampled_from([0.0, 1.0]), ), hu.arrays( dims=shape, elements=st.floats(min_value=0.1, max_value=1.0), ), ) ), **hu.gcs ) def test_weighted_sigmoid_cross_entropy_with_logits(self, inputs, gc, dc): logits, targets, weights = inputs def weighted_sigmoid_xentr_logit_ref(logits, targets, weights): s = sigmoid_cross_entropy_with_logits(logits, targets) s = np.multiply(s, weights) m = np.mean(s, axis=len(logits.shape) - 1) return (m, ) def weighted_sigmoid_xentr_logit_grad_ref(g_out, outputs, fwd_inputs): fwd_logits, fwd_targets, fwd_weights = fwd_inputs inner_size = fwd_logits.shape[-1] m = fwd_targets - sigmoid(fwd_logits) m = np.multiply(m, weights) g_in = -np.expand_dims(g_out, axis=-1) * m / inner_size return (g_in, None, None) op = core.CreateOperator( 'WeightedSigmoidCrossEntropyWithLogits', ['logits', 'targets', 'weights'], ['xentropy']) self.assertReferenceChecks( device_option=gc, op=op, inputs=[logits, targets, weights], reference=weighted_sigmoid_xentr_logit_ref, output_to_grad='xentropy', grad_reference=weighted_sigmoid_xentr_logit_grad_ref) @given(n=st.integers(2, 10), b=st.integers(1, 5), **hu.gcs_cpu_only) def test_soft_label_cross_entropy(self, n, b, gc, dc): # Initialize X and add 1e-2 for numerical stability X = np.random.rand(b, n).astype(np.float32) X = X + 1e-2 for i in range(b): X[i] = X[i] / np.sum(X[i]) # Initialize label label = np.random.rand(b, n).astype(np.float32) for i in range(b): label[i] = label[i] / np.sum(label[i]) # Reference implementation of cross entropy with soft labels def soft_label_xentr_ref(X, label): xent = [np.sum((-label[j][i] * np.log(max(X[j][i], 1e-20)) for i in range(len(X[0])))) for j in range(b)] return (xent,) op = core.CreateOperator("CrossEntropy", ["X", "label"], ["Y"]) # TODO(surya) Once CrossEntropyOp is ported to GPU, add the respective # tests to this unit test. self.assertReferenceChecks( device_option=gc, op=op, inputs=[X, label], reference=soft_label_xentr_ref, ) self.assertGradientChecks( gc, op, [X, label], 0, [0], stepsize=1e-4, threshold=1e-2)
class TestLayers(LayersTestCase): def testFCWithoutBias(self): output_dims = 2 fc_without_bias = self.model.FCWithoutBias( self.model.input_feature_schema.float_features, output_dims) self.model.output_schema = fc_without_bias self.assertEqual(schema.Scalar((np.float32, (output_dims, ))), fc_without_bias) train_init_net, train_net = self.get_training_nets() init_ops = self.assertNetContainOps(train_init_net, [ OpSpec("UniformFill", None, None), ]) mat_mul_spec = OpSpec("MatMul", [ self.model.input_feature_schema.float_features(), init_ops[0].output[0], ], fc_without_bias.field_blobs()) self.assertNetContainOps(train_net, [mat_mul_spec]) predict_net = self.get_predict_net() self.assertNetContainOps(predict_net, [mat_mul_spec]) def testSamplingTrain(self): output_dims = 1000 indices = self.new_record(schema.Scalar((np.int32, (10, )))) sampling_prob = self.new_record(schema.Scalar((np.float32, (10, )))) sampled_fc = self.model.SamplingTrain( schema.Struct( ('input', self.model.input_feature_schema.float_features), ('indices', indices), ('sampling_prob', sampling_prob), ), "FC", output_dims, ) self.model.output_schema = sampled_fc # Check that we don't add prediction layer into the model self.assertEqual(1, len(self.model.layers)) self.assertEqual(schema.Scalar((np.float32, (output_dims, ))), sampled_fc) train_init_net, train_net = self.get_training_nets() init_ops = self.assertNetContainOps(train_init_net, [ OpSpec("UniformFill", None, None), OpSpec("UniformFill", None, None), ]) sampled_fc_layer = self.model.layers[0] gather_w_spec = OpSpec("Gather", [ init_ops[0].output[0], indices(), ], [sampled_fc_layer._prediction_layer.train_param_blobs[0]]) gather_b_spec = OpSpec("Gather", [ init_ops[1].output[0], indices(), ], [sampled_fc_layer._prediction_layer.train_param_blobs[1]]) train_fc_spec = OpSpec("FC", [ self.model.input_feature_schema.float_features(), ] + sampled_fc_layer._prediction_layer.train_param_blobs, sampled_fc.field_blobs()) log_spec = OpSpec("Log", [sampling_prob()], [None]) sub_spec = OpSpec("Sub", [sampled_fc.field_blobs()[0], None], sampled_fc.field_blobs()) train_ops = self.assertNetContainOps( train_net, [gather_w_spec, gather_b_spec, train_fc_spec, log_spec, sub_spec]) self.assertEqual(train_ops[3].output[0], train_ops[4].input[1]) predict_net = self.get_predict_net() self.assertNetContainOps(predict_net, [ OpSpec("FC", [ self.model.input_feature_schema.float_features(), init_ops[0].output[0], init_ops[1].output[0], ], sampled_fc.field_blobs()) ]) def testBatchLRLoss(self): input_record = self.new_record( schema.Struct(('label', schema.Scalar((np.float64, (1, )))), ('prediction', schema.Scalar((np.float32, (2, )))), ('weight', schema.Scalar((np.float64, (1, )))))) loss = self.model.BatchLRLoss(input_record) self.assertEqual(schema.Scalar((np.float32, tuple())), loss) def testBatchMSELoss(self): input_record = self.new_record( schema.Struct( ('label', schema.Scalar((np.float64, (1, )))), ('prediction', schema.Scalar((np.float32, (2, )))), )) loss = self.model.BatchMSELoss(input_record) self.assertEqual(schema.Scalar((np.float32, tuple())), loss) def testBatchSigmoidCrossEntropyLoss(self): input_record = self.new_record( schema.Struct(('label', schema.Scalar((np.float32, (32, )))), ('prediction', schema.Scalar((np.float32, (32, )))))) loss = self.model.BatchSigmoidCrossEntropyLoss(input_record) self.assertEqual(schema.Scalar((np.float32, tuple())), loss) def testBatchSoftmaxLoss(self): input_record = self.new_record( schema.Struct(('label', schema.Scalar((np.float32, tuple()))), ('prediction', schema.Scalar((np.float32, (32, )))))) loss = self.model.BatchSoftmaxLoss(input_record) self.assertEqual( schema.Struct( ('softmax', schema.Scalar((np.float32, (32, )))), ('loss', schema.Scalar(np.float32)), ), loss) @given( X=hu.arrays(dims=[2, 5]), ) def testBatchNormalization(self, X): input_record = self.new_record(schema.Scalar((np.float32, (5, )))) schema.FeedRecord(input_record, [X]) bn_output = self.model.BatchNormalization(input_record) self.assertEqual(schema.Scalar((np.float32, (5, ))), bn_output) self.model.output_schema = schema.Struct() train_init_net, train_net = self.get_training_nets() init_ops = self.assertNetContainOps(train_init_net, [ OpSpec("ConstantFill", None, None), OpSpec("ConstantFill", None, None), OpSpec("ConstantFill", None, None), OpSpec("ConstantFill", None, None), ]) input_blob = input_record.field_blobs()[0] output_blob = bn_output.field_blobs()[0] expand_dims_spec = OpSpec( "ExpandDims", [input_blob], [input_blob], ) train_bn_spec = OpSpec( "SpatialBN", [ input_blob, init_ops[0].output[0], init_ops[1].output[0], init_ops[2].output[0], init_ops[3].output[0] ], [ output_blob, init_ops[2].output[0], init_ops[3].output[0], None, None ], { 'is_test': 0, 'order': 'NCHW', 'momentum': 0.9 }, ) test_bn_spec = OpSpec( "SpatialBN", [ input_blob, init_ops[0].output[0], init_ops[1].output[0], init_ops[2].output[0], init_ops[3].output[0] ], [output_blob], { 'is_test': 1, 'order': 'NCHW', 'momentum': 0.9 }, ) squeeze_spec = OpSpec( "Squeeze", [output_blob], [output_blob], ) self.assertNetContainOps( train_net, [expand_dims_spec, train_bn_spec, squeeze_spec]) eval_net = self.get_eval_net() self.assertNetContainOps( eval_net, [expand_dims_spec, test_bn_spec, squeeze_spec]) predict_net = self.get_predict_net() self.assertNetContainOps( predict_net, [expand_dims_spec, test_bn_spec, squeeze_spec]) workspace.RunNetOnce(train_init_net) workspace.RunNetOnce(train_net) schema.FeedRecord(input_record, [X]) workspace.RunNetOnce(eval_net) schema.FeedRecord(input_record, [X]) workspace.RunNetOnce(predict_net) @given( X=hu.arrays(dims=[5, 2]), num_to_collect=st.integers(min_value=1, max_value=10), ) def testLastNWindowCollector(self, X, num_to_collect): input_record = self.new_record(schema.Scalar(np.float32)) schema.FeedRecord(input_record, [X]) last_n = self.model.LastNWindowCollector(input_record, num_to_collect) self.run_train_net_forward_only() output_record = schema.FetchRecord(last_n) start = max(0, 5 - num_to_collect) npt.assert_array_equal(X[start:], output_record()) def testUniformSampling(self): input_record = self.new_record(schema.Scalar(np.int32)) input_array = np.array([3, 10, 11, 15, 20, 99], dtype=np.int32) schema.FeedRecord(input_record, [input_array]) num_samples = 20 num_elements = 100 uniform_sampling_output = self.model.UniformSampling( input_record, num_samples, num_elements) self.model.loss = uniform_sampling_output self.run_train_net() samples = workspace.FetchBlob(uniform_sampling_output.samples()) sampling_prob = workspace.FetchBlob( uniform_sampling_output.sampling_prob()) self.assertEqual(num_samples, len(samples)) np.testing.assert_array_equal(input_array, samples[:len(input_array)]) np.testing.assert_almost_equal( np.array([float(num_samples) / num_elements] * num_samples, dtype=np.float32), sampling_prob) def testGatherRecord(self): indices = np.array([1, 3, 4], dtype=np.int32) dense = np.array(range(20), dtype=np.float32).reshape(10, 2) lengths = np.array(range(10), dtype=np.int32) items = np.array(range(lengths.sum()), dtype=np.int64) items_lengths = np.array(range(lengths.sum()), dtype=np.int32) items_items = np.array(range(items_lengths.sum()), dtype=np.int64) record = self.new_record( schema.Struct( ('dense', schema.Scalar(np.float32)), ('sparse', schema.Struct( ('list', schema.List(np.int64)), ('list_of_list', schema.List(schema.List(np.int64))), )), ('empty_struct', schema.Struct()))) indices_record = self.new_record(schema.Scalar(np.int32)) input_record = schema.Struct( ('indices', indices_record), ('record', record), ) schema.FeedRecord(input_record, [ indices, dense, lengths, items, lengths, items_lengths, items_items ]) gathered_record = self.model.GatherRecord(input_record) self.assertTrue(schema.equal_schemas(gathered_record, record)) self.run_train_net_forward_only() gathered_dense = workspace.FetchBlob(gathered_record.dense()) np.testing.assert_array_equal( np.concatenate([dense[i:i + 1] for i in indices]), gathered_dense) gathered_lengths = workspace.FetchBlob( gathered_record.sparse.list.lengths()) np.testing.assert_array_equal( np.concatenate([lengths[i:i + 1] for i in indices]), gathered_lengths) gathered_items = workspace.FetchBlob( gathered_record.sparse.list.items()) offsets = lengths.cumsum() - lengths np.testing.assert_array_equal( np.concatenate( [items[offsets[i]:offsets[i] + lengths[i]] for i in indices]), gathered_items) gathered_items_lengths = workspace.FetchBlob( gathered_record.sparse.list_of_list.items.lengths()) np.testing.assert_array_equal( np.concatenate([ items_lengths[offsets[i]:offsets[i] + lengths[i]] for i in indices ]), gathered_items_lengths) nested_offsets = [] nested_lengths = [] nested_offset = 0 j = 0 for l in lengths: nested_offsets.append(nested_offset) nested_length = 0 for _i in range(l): nested_offset += items_lengths[j] nested_length += items_lengths[j] j += 1 nested_lengths.append(nested_length) gathered_items_items = workspace.FetchBlob( gathered_record.sparse.list_of_list.items.items()) np.testing.assert_array_equal( np.concatenate([ items_items[nested_offsets[i]:nested_offsets[i] + nested_lengths[i]] for i in indices ]), gathered_items_items) def testMapToRange(self): input_record = self.new_record(schema.Scalar(np.int32)) map_to_range_output = self.model.MapToRange(input_record, max_index=100) self.model.output_schema = schema.Struct() train_init_net, train_net = self.get_training_nets() schema.FeedRecord( input_record, [np.array([10, 3, 20, 99, 15, 11, 3, 11], dtype=np.int32)]) workspace.RunNetOnce(train_init_net) workspace.RunNetOnce(train_net) indices = workspace.FetchBlob(map_to_range_output()) np.testing.assert_array_equal( np.array([1, 2, 3, 4, 5, 6, 2, 6], dtype=np.int32), indices) schema.FeedRecord( input_record, [np.array([10, 3, 23, 35, 60, 15, 10, 15], dtype=np.int32)]) workspace.RunNetOnce(train_net) indices = workspace.FetchBlob(map_to_range_output()) np.testing.assert_array_equal( np.array([1, 2, 7, 8, 9, 5, 1, 5], dtype=np.int32), indices) eval_net = self.get_eval_net() schema.FeedRecord( input_record, [np.array([10, 3, 23, 35, 60, 15, 200], dtype=np.int32)]) workspace.RunNetOnce(eval_net) indices = workspace.FetchBlob(map_to_range_output()) np.testing.assert_array_equal( np.array([1, 2, 7, 8, 9, 5, 0], dtype=np.int32), indices) schema.FeedRecord( input_record, [np.array([10, 3, 23, 15, 101, 115], dtype=np.int32)]) workspace.RunNetOnce(eval_net) indices = workspace.FetchBlob(map_to_range_output()) np.testing.assert_array_equal( np.array([1, 2, 7, 5, 0, 0], dtype=np.int32), indices) predict_net = self.get_predict_net() schema.FeedRecord( input_record, [np.array([3, 3, 20, 23, 151, 35, 60, 15, 200], dtype=np.int32)]) workspace.RunNetOnce(predict_net) indices = workspace.FetchBlob(map_to_range_output()) np.testing.assert_array_equal( np.array([2, 2, 3, 7, 0, 8, 9, 5, 0], dtype=np.int32), indices) def testSelectRecordByContext(self): float_features = self.model.input_feature_schema.float_features float_array = np.array([1.0, 2.0], dtype=np.float32) schema.FeedRecord(float_features, [float_array]) with Tags(Tags.EXCLUDE_FROM_PREDICTION): log_float_features, = self.model.Log(float_features, 1) joined = self.model.SelectRecordByContext( schema.Struct( (InstantiationContext.PREDICTION, float_features), (InstantiationContext.TRAINING, log_float_features), # TODO: TRAIN_ONLY layers are also generated in eval (InstantiationContext.EVAL, log_float_features), )) # model.output_schema has to a struct self.model.output_schema = schema.Struct(('joined', joined)) predict_net = layer_model_instantiator.generate_predict_net(self.model) workspace.RunNetOnce(predict_net) predict_output = schema.FetchRecord(predict_net.output_record()) npt.assert_array_equal(float_array, predict_output['joined']()) eval_net = layer_model_instantiator.generate_eval_net(self.model) workspace.RunNetOnce(eval_net) eval_output = schema.FetchRecord(eval_net.output_record()) npt.assert_array_equal(np.log(float_array), eval_output['joined']()) _, train_net = ( layer_model_instantiator.generate_training_nets_forward_only( self.model)) workspace.RunNetOnce(train_net) train_output = schema.FetchRecord(train_net.output_record()) npt.assert_array_equal(np.log(float_array), train_output['joined']()) def testFunctionalLayer(self): def normalize(net, in_record, out_record): mean = net.ReduceFrontMean(in_record(), 1) net.Sub([in_record(), mean], out_record[0](), broadcast=1) normalized = self.model.Functional( self.model.input_feature_schema.float_features, 1, normalize, name="normalizer") # Attach metadata to one of the outputs and use it in FC normalized[0].set_type((np.float32, 32)) self.model.output_schema = self.model.FC(normalized[0], 2) predict_net = layer_model_instantiator.generate_predict_net(self.model) ops = predict_net.Proto().op assert len(ops) == 3 assert ops[0].type == "ReduceFrontMean" assert ops[1].type == "Sub" assert ops[2].type == "FC" assert len(ops[0].input) == 1 assert ops[0].input[0] ==\ self.model.input_feature_schema.float_features() assert len(ops[1].output) == 1 assert ops[1].output[0] in ops[2].input def testFunctionalLayerHelper(self): mean = self.model.ReduceFrontMean( self.model.input_feature_schema.float_features, 1) normalized = self.model.Sub(schema.Tuple( self.model.input_feature_schema.float_features, mean[0]), 1, broadcast=1) # Attach metadata to one of the outputs and use it in FC normalized[0].set_type((np.float32, (32, ))) self.model.output_schema = self.model.FC(normalized[0], 2) predict_net = layer_model_instantiator.generate_predict_net(self.model) ops = predict_net.Proto().op assert len(ops) == 3 assert ops[0].type == "ReduceFrontMean" assert ops[1].type == "Sub" assert ops[2].type == "FC" assert len(ops[0].input) == 1 assert ops[0].input[0] ==\ self.model.input_feature_schema.float_features() assert len(ops[1].output) == 1 assert ops[1].output[0] in ops[2].input def testFunctionalLayerHelperAutoInference(self): softsign = self.model.Softsign( schema.Tuple(self.model.input_feature_schema.float_features), 1) assert len(softsign.field_types()) == 1 assert softsign.field_types()[0].base == np.float32 assert softsign.field_types()[0].shape == (32, ) self.model.output_schema = self.model.FC(softsign[0], 2) predict_net = layer_model_instantiator.generate_predict_net(self.model) ops = predict_net.Proto().op assert len(ops) == 2 assert ops[0].type == "Softsign" assert ops[1].type == "FC" assert len(ops[0].input) == 1 assert ops[0].input[0] ==\ self.model.input_feature_schema.float_features() assert len(ops[0].output) == 1 assert ops[0].output[0] in ops[1].input def testFunctionalLayerHelperAutoInferenceScalar(self): loss = self.model.AveragedLoss(self.model.input_feature_schema, 1) self.assertEqual(1, len(loss.field_types())) self.assertEqual(np.float32, loss.field_types()[0].base) self.assertEqual(tuple(), loss.field_types()[0].shape) def testFunctionalLayerInputCoercion(self): one = self.model.global_constants['ONE'] two = self.model.Add([one, one], 1) self.model.loss = two self.run_train_net() data = workspace.FetchBlob(two.field_blobs()[0]) np.testing.assert_array_equal([2.0], data) def testFunctionalLayerWithOutputNames(self): k = 3 topk = self.model.TopK( self.model.input_feature_schema, output_names_or_num=['values', 'indices'], k=k, ) self.assertEqual(2, len(topk.field_types())) self.assertEqual(np.float32, topk.field_types()[0].base) self.assertEqual((k, ), topk.field_types()[0].shape) self.assertEqual(np.int32, topk.field_types()[1].base) self.assertEqual((k, ), topk.field_types()[1].shape) self.assertEqual(['TopK/values', 'TopK/indices'], topk.field_blobs()) def testFunctionalLayerWithOutputDtypes(self): loss = self.model.AveragedLoss( self.model.input_feature_schema, 1, output_dtypes=(np.float32, (1, )), ) self.assertEqual(1, len(loss.field_types())) self.assertEqual(np.float32, loss.field_types()[0].base) self.assertEqual((1, ), loss.field_types()[0].shape) def testPropagateRequestOnly(self): # test case when output is request only input_record = self.new_record( schema.Struct( ('input1', schema.Scalar((np.float32, (32, )))), ('input2', schema.Scalar((np.float32, (64, )))), ('input3', schema.Scalar((np.float32, (16, )))), )) set_request_only(input_record) concat_output = self.model.Concat(input_record) self.assertEqual(is_request_only_scalar(concat_output), True) # test case when output is not request only input_record2 = self.new_record( schema.Struct(('input4', schema.Scalar( (np.float32, (100, )))))) + input_record concat_output2 = self.model.Concat(input_record2) self.assertEqual(is_request_only_scalar(concat_output2), False) def testSetRequestOnly(self): input_record = schema.Scalar(np.int64) schema.attach_metadata_to_scalars( input_record, schema.Metadata( categorical_limit=100000000, expected_value=99, feature_specs=schema.FeatureSpec(feature_ids=[1, 100, 1001]))) set_request_only(input_record) self.assertEqual(input_record.metadata.categorical_limit, 100000000) self.assertEqual(input_record.metadata.expected_value, 99) self.assertEqual(input_record.metadata.feature_specs.feature_ids, [1, 100, 1001])
class TestFillerOperator(serial.SerializedTestCase): @given(**hu.gcs) @settings(deadline=10000) def test_shape_error(self, gc, dc): op = core.CreateOperator( 'GaussianFill', [], 'out', shape=32, # illegal parameter mean=0.0, std=1.0, ) exception = False try: workspace.RunOperatorOnce(op) except Exception: exception = True self.assertTrue(exception, "Did not throw exception on illegal shape") op = core.CreateOperator( 'ConstantFill', [], 'out', shape=[], # scalar value=2.0, ) exception = False self.assertTrue(workspace.RunOperatorOnce(op)) self.assertEqual(workspace.FetchBlob('out'), [2.0]) @given(**hu.gcs) @settings(deadline=10000) def test_int64_shape(self, gc, dc): large_dim = 2**31 + 1 net = core.Net("test_shape_net") net.UniformFill( [], 'out', shape=[0, large_dim], min=0.0, max=1.0, ) self.assertTrue(workspace.CreateNet(net)) self.assertTrue(workspace.RunNet(net.Name())) self.assertEqual(workspace.blobs['out'].shape, (0, large_dim)) @given(shape=hu.dims().flatmap(lambda dims: hu.arrays( [dims], dtype=np.int64, elements=st.integers(min_value=0, max_value=20))), a=st.integers(min_value=0, max_value=100), b=st.integers(min_value=0, max_value=100), **hu.gcs) @settings(deadline=10000) def test_uniform_int_fill_op_blob_input(self, shape, a, b, gc, dc): net = core.Net('test_net') with core.DeviceScope(core.DeviceOption(caffe2_pb2.CPU)): shape_blob = net.Const(shape, dtype=np.int64) a_blob = net.Const(a, dtype=np.int32) b_blob = net.Const(b, dtype=np.int32) uniform_fill = net.UniformIntFill([shape_blob, a_blob, b_blob], 1, input_as_shape=1) workspace.RunNetOnce(net) blob_out = workspace.FetchBlob(uniform_fill) if b < a: new_shape = shape[:] new_shape[0] = 0 np.testing.assert_array_equal(new_shape, blob_out.shape) else: np.testing.assert_array_equal(shape, blob_out.shape) self.assertTrue((blob_out >= a).all()) self.assertTrue((blob_out <= b).all()) @given(**hu.gcs) def test_uniform_fill_using_arg(self, gc, dc): net = core.Net('test_net') shape = [2**3, 5] # uncomment this to test filling large blob # shape = [2**30, 5] min_v = -100 max_v = 100 output_blob = net.UniformIntFill( [], ['output_blob'], shape=shape, min=min_v, max=max_v, ) workspace.RunNetOnce(net) output_data = workspace.FetchBlob(output_blob) np.testing.assert_array_equal(shape, output_data.shape) min_data = np.min(output_data) max_data = np.max(output_data) self.assertGreaterEqual(min_data, min_v) self.assertLessEqual(max_data, max_v) self.assertNotEqual(min_data, max_data) @serial.given(shape=st.sampled_from([ [3, 3], [5, 5, 5], [7, 7, 7, 7], ]), **hu.gcs) def test_diagonal_fill_op_float(self, shape, gc, dc): value = 2.5 op = core.CreateOperator( 'DiagonalFill', [], 'out', shape=shape, # scalar value=value, ) for device_option in dc: op.device_option.CopyFrom(device_option) # Check against numpy reference self.assertReferenceChecks(gc, op, [shape, value], _fill_diagonal) @given(**hu.gcs) def test_diagonal_fill_op_int(self, gc, dc): value = 2 shape = [3, 3] op = core.CreateOperator( 'DiagonalFill', [], 'out', shape=shape, dtype=core.DataType.INT32, value=value, ) # Check against numpy reference self.assertReferenceChecks(gc, op, [shape, value], _fill_diagonal) @serial.given(lengths=st.lists(st.integers(min_value=0, max_value=10), min_size=0, max_size=10), **hu.gcs) def test_lengths_range_fill(self, lengths, gc, dc): op = core.CreateOperator("LengthsRangeFill", ["lengths"], ["increasing_seq"]) def _len_range_fill(lengths): sids = [] for _, l in enumerate(lengths): sids.extend(list(range(l))) return (np.array(sids, dtype=np.int32), ) self.assertReferenceChecks(device_option=gc, op=op, inputs=[np.array(lengths, dtype=np.int32)], reference=_len_range_fill) @given(**hu.gcs) def test_gaussian_fill_op(self, gc, dc): op = core.CreateOperator( 'GaussianFill', [], 'out', shape=[17, 3, 3], # sample odd dimensions mean=0.0, std=1.0, ) for device_option in dc: op.device_option.CopyFrom(device_option) assert workspace.RunOperatorOnce( op), "GaussianFill op did not run " "successfully" blob_out = workspace.FetchBlob('out') assert np.count_nonzero( blob_out) > 0, "All generated elements are " "zeros. Is the random generator functioning correctly?" @given(**hu.gcs) def test_msra_fill_op(self, gc, dc): op = core.CreateOperator( 'MSRAFill', [], 'out', shape=[15, 5, 3], # sample odd dimensions ) for device_option in dc: op.device_option.CopyFrom(device_option) assert workspace.RunOperatorOnce(op), "MSRAFill op did not run " "successfully" blob_out = workspace.FetchBlob('out') assert np.count_nonzero( blob_out) > 0, "All generated elements are " "zeros. Is the random generator functioning correctly?" @given(min=st.integers(min_value=0, max_value=5), range=st.integers(min_value=1, max_value=10), emb_size=st.sampled_from((10000, 20000, 30000)), dim_size=st.sampled_from((16, 32, 64)), **hu.gcs) @settings(deadline=None) def test_fp16_uniformfill_op(self, min, range, emb_size, dim_size, gc, dc): op = core.CreateOperator( 'Float16UniformFill', [], 'out', shape=[emb_size, dim_size], min=float(min), max=float(min + range), ) for device_option in dc: op.device_option.CopyFrom(device_option) assert workspace.RunOperatorOnce( op), "Float16UniformFill op did not run successfully" self.assertEqual(workspace.blobs['out'].shape, (emb_size, dim_size)) blob_out = workspace.FetchBlob('out') expected_type = "float16" expected_mean = min + range / 2.0 expected_var = range * range / 12.0 expected_min = min expected_max = min + range self.assertEqual(blob_out.dtype.name, expected_type) self.assertAlmostEqual(np.mean(blob_out, dtype=np.float32), expected_mean, delta=0.1) self.assertAlmostEqual(np.var(blob_out, dtype=np.float32), expected_var, delta=0.1) self.assertGreaterEqual(np.min(blob_out), expected_min) self.assertLessEqual(np.max(blob_out), expected_max)
class TestCrossEntropyOps(hu.HypothesisTestCase): @given( inputs=st.lists( elements=st.integers(min_value=1, max_value=5), min_size=1, max_size=2, average_size=2, ).flatmap( lambda shape: st.tuples( hu.arrays( dims=shape, elements=st.one_of( st.floats(min_value=-1.0, max_value=-0.1), st.floats(min_value=0.1, max_value=1.0), )), hu.arrays( dims=shape, elements=st.sampled_from([0.0, 1.0]), ), ) ), ) def test_sigmoid_cross_entropy_with_logits(self, inputs): logits, targets = inputs def sigmoid_xentr_logit_ref(logits, targets): s = sigmoid_cross_entropy_with_logits(logits, targets) m = np.mean(s, axis=len(logits.shape) - 1) return (m, ) def sigmoid_xentr_logit_grad_ref(g_out, outputs, fwd_inputs): fwd_logits, fwd_targets = fwd_inputs inner_size = fwd_logits.shape[-1] m = fwd_targets - sigmoid(fwd_logits) g_in = -np.expand_dims(g_out, axis=-1) * m / inner_size return (g_in, None) op = core.CreateOperator( 'SigmoidCrossEntropyWithLogits', ['logits', 'targets'], ['xentropy']) self.assertReferenceChecks( hu.cpu_do, op, [logits, targets], sigmoid_xentr_logit_ref, output_to_grad='xentropy', grad_reference=sigmoid_xentr_logit_grad_ref) @given(n=st.integers(2, 10), **hu.gcs_cpu_only) def test_soft_label_cross_entropy(self, n, gc, dc): # Initialize X and add 1e-2 for numerical stability X = np.random.rand(n).astype(np.float32) X = X + 1e-2 X = np.expand_dims((X / np.sum(X)), axis=0) # Initialize label label = np.random.rand(n).astype(np.float32) label = np.expand_dims((label / np.sum(label)), axis=0) # Reference implementation of cross entropy with soft labels def soft_label_xentr_ref(X, label): xent = [np.sum((-label[0][i] * np.log(max(X[0][i], 1e-20)) for i in range(len(X[0]))))] return (xent,) op = core.CreateOperator("CrossEntropy", ["X", "label"], ["Y"]) # TODO(surya) Once CrossEntropyOp is ported to GPU, add the respective # tests to this unit test. self.assertReferenceChecks( device_option=gc, op=op, inputs=[X, label], reference=soft_label_xentr_ref, ) self.assertGradientChecks( gc, op, [X, label], 0, [0], stepsize=1e-4, threshold=1e-2)
class TestRegularizer(LayersTestCase): @given(X=hu.arrays(dims=[2, 5], elements=hu.floats(min_value=-1.0, max_value=1.0))) def test_log_barrier(self, X): param = core.BlobReference("X") workspace.FeedBlob(param, X) train_init_net, train_net = self.get_training_nets() reg = regularizer.LogBarrier(1.0) output = reg(train_net, train_init_net, param, by=RegularizationBy.ON_LOSS) reg( train_net, train_init_net, param, grad=None, by=RegularizationBy.AFTER_OPTIMIZER, ) workspace.RunNetOnce(train_init_net) workspace.RunNetOnce(train_net) def ref(X): return ( np.array(np.sum(-np.log(np.clip(X, 1e-9, None))) * 0.5).astype( np.float32), np.clip(X, 1e-9, None), ) for x, y in zip(workspace.FetchBlobs([output, param]), ref(X)): npt.assert_allclose(x, y, rtol=1e-3) @given(X=hu.arrays(dims=[2, 5], elements=hu.floats(min_value=-1.0, max_value=1.0)), left_open=st.booleans(), right_open=st.booleans(), eps=hu.floats(min_value=1e-6, max_value=1e-4), ub=hu.floats(min_value=-1.0, max_value=1.0), lb=hu.floats(min_value=-1.0, max_value=1.0), **hu.gcs_cpu_only) def test_bounded_grad_proj(self, X, left_open, right_open, eps, ub, lb, gc, dc): if ub - (eps if right_open else 0.) < lb + (eps if left_open else 0.): return param = core.BlobReference("X") workspace.FeedBlob(param, X) train_init_net, train_net = self.get_training_nets() reg = regularizer.BoundedGradientProjection(lb=lb, ub=ub, left_open=left_open, right_open=right_open, epsilon=eps) output = reg(train_net, train_init_net, param, by=RegularizationBy.ON_LOSS) reg( train_net, train_init_net, param, grad=None, by=RegularizationBy.AFTER_OPTIMIZER, ) workspace.RunNetOnce(train_init_net) workspace.RunNetOnce(train_net) def ref(X): return np.clip(X, lb + (eps if left_open else 0.), ub - (eps if right_open else 0.)) assert output is None npt.assert_allclose(workspace.blobs[param], ref(X), atol=1e-7) @given(output_dim=st.integers(1, 10), input_num=st.integers(3, 30), reg_weight=st.integers(0, 10)) def test_group_l1_norm(self, output_dim, input_num, reg_weight): """ 1. create a weight blob 2. create random group splits 3. run group_l1_nrom with the weight blob 4. run equivalent np operations to calculate group l1 norm 5. compare if the results from 3 and 4 are equal """ def compare_reference(weight, group_boundaries, reg_lambda, output): group_splits = np.hsplit(weight, group_boundaries[1:-1]) l2_reg = np.sqrt([np.sum(np.square(g)) for g in group_splits]) l2_normalized = np.multiply( l2_reg, np.array([np.sqrt(g.shape[1]) for g in group_splits])) result = np.multiply(np.sum(l2_normalized), reg_lambda) npt.assert_almost_equal(result, workspace.blobs[output], decimal=2) weight = np.random.rand(output_dim, input_num).astype(np.float32) feature_num = np.random.randint(low=1, high=input_num - 1) group_boundaries = [0] group_boundaries = np.append( group_boundaries, np.sort( np.random.choice(range(1, input_num - 1), feature_num, replace=False)), ) group_boundaries = np.append(group_boundaries, [input_num]) split_info = np.diff(group_boundaries) weight_blob = core.BlobReference("weight_blob") workspace.FeedBlob(weight_blob, weight) train_init_net, train_net = self.get_training_nets() reg = regularizer.GroupL1Norm(reg_weight * 0.1, split_info.tolist()) output = reg(train_net, train_init_net, weight_blob, by=RegularizationBy.ON_LOSS) workspace.RunNetOnce(train_init_net) workspace.RunNetOnce(train_net) compare_reference(weight, group_boundaries, reg_weight * 0.1, output) @given(param_dim=st.integers(10, 30), k=st.integers(5, 9), reg_weight=st.integers(0, 10)) def test_l1_norm_trimmed(self, param_dim, k, reg_weight): weight = np.random.rand(param_dim).astype(np.float32) weight_blob = core.BlobReference("weight_blob") workspace.FeedBlob(weight_blob, weight) train_init_net, train_net = self.get_training_nets() reg = regularizer.L1NormTrimmed(reg_weight * 0.1, k) output = reg(train_net, train_init_net, weight_blob, by=RegularizationBy.ON_LOSS) workspace.RunNetOnce(train_init_net) workspace.RunNetOnce(train_net) result = np.sum(np.sort( np.absolute(weight))[:(param_dim - k)]) * reg_weight * 0.1 npt.assert_almost_equal(result, workspace.blobs[output], decimal=2) @given(param_dim=st.integers(10, 30), k=st.integers(5, 9), l1=st.integers(0, 10), l2=st.integers(0, 10)) def test_elastic_l1_norm_trimmed(self, param_dim, k, l1, l2): weight = np.random.rand(param_dim).astype(np.float32) weight_blob = core.BlobReference("weight_blob") workspace.FeedBlob(weight_blob, weight) train_init_net, train_net = self.get_training_nets() reg = regularizer.ElasticNetL1NormTrimmed(l1 * 0.1, l2 * 0.1, k) output = reg(train_net, train_init_net, weight_blob, by=RegularizationBy.ON_LOSS) workspace.RunNetOnce(train_init_net) workspace.RunNetOnce(train_net) l1_norm = np.sum(np.sort(np.absolute(weight))[:(param_dim - k)]) l2_norm = np.sum(np.square(weight)) result = l1_norm * l1 * 0.1 + l2_norm * l2 * 0.1 npt.assert_almost_equal(result, workspace.blobs[output], decimal=2)
class TestUtilityOps(hu.HypothesisTestCase): @given(X=hu.tensor(), neg=st.booleans(), **hu.gcs) def test_slice(self, X, neg, gc, dc): X = X.astype(dtype=np.float32) dim = random.randint(0, X.ndim - 1) slice_start = random.randint(0, X.shape[dim] - 1) slice_end = random.randint(slice_start, X.shape[dim] - 1) starts = np.array([0] * X.ndim).astype(np.int32) ends = np.array([-1] * X.ndim).astype(np.int32) starts[dim] = slice_start ends[dim] = slice_end op = core.CreateOperator( "Slice", ["X", "starts", "ends"], ["Y"], device_option=gc ) def slice_ref(X, starts, ends): slc = [slice(None)] * X.ndim slc[dim] = slice(slice_start, slice_end) return [X[slc]] self.assertReferenceChecks(gc, op, [X, starts, ends], slice_ref) self.assertDeviceChecks(dc, op, [X, starts, ends], [0]) @given(dtype=st.sampled_from([np.float32, np.int32, np.int64]), ndims=st.integers(min_value=1, max_value=5), seed=st.integers(min_value=0, max_value=65536), null_axes=st.booleans(), engine=st.sampled_from(['CUDNN', None]), **hu.gcs) def test_transpose(self, dtype, ndims, seed, null_axes, engine, gc, dc): dims = (np.random.rand(ndims) * 16 + 1).astype(np.int32) X = (np.random.rand(*dims) * 16).astype(dtype) if null_axes: axes = None op = core.CreateOperator( "Transpose", ["input"], ["output"], engine=engine) else: np.random.seed(int(seed)) axes = [int(v) for v in list(np.random.permutation(X.ndim))] op = core.CreateOperator( "Transpose", ["input"], ["output"], axes=axes, engine=engine) def transpose_ref(x, axes): return (np.transpose(x, axes),) self.assertReferenceChecks(gc, op, [X, axes], transpose_ref) @given(m=st.integers(5, 10), n=st.integers(5, 10), o=st.integers(5, 10), nans=st.booleans(), **hu.gcs) def test_nan_check(self, m, n, o, nans, gc, dc): other = np.array([1, 2, 3]).astype(np.float32) X = np.random.rand(m, n, o).astype(np.float32) if nans: x_nan = np.random.randint(0, m) y_nan = np.random.randint(0, n) z_nan = np.random.randint(0, o) X[x_nan, y_nan, z_nan] = float('NaN') # print('nans: {}'.format(nans)) # print(X) def nan_reference(X, Y): if not np.isnan(X).any(): return [X] else: return [np.array([])] op = core.CreateOperator( "NanCheck", ["X", "other"], ["Y"] ) try: self.assertReferenceChecks( device_option=gc, op=op, inputs=[X, other], reference=nan_reference, ) if nans: self.assertTrue(False, "Did not fail when presented with NaN!") except RuntimeError: self.assertTrue(nans, "No NaNs but failed") try: self.assertGradientChecks( device_option=gc, op=op, inputs=[X], outputs_to_check=0, outputs_with_grads=[0], ) if nans: self.assertTrue(False, "Did not fail when gradient had NaN!") except RuntimeError: pass @given(n=st.integers(4, 5), m=st.integers(6, 7), d=st.integers(2, 3), **hu.gcs) def test_elementwise_max(self, n, m, d, gc, dc): X = np.random.rand(n, m, d).astype(np.float32) Y = np.random.rand(n, m, d).astype(np.float32) Z = np.random.rand(n, m, d).astype(np.float32) def max_op(X, Y, Z): return [np.maximum(np.maximum(X, Y), Z)] op = core.CreateOperator( "Max", ["X", "Y", "Z"], ["mx"] ) self.assertReferenceChecks( device_option=gc, op=op, inputs=[X, Y, Z], reference=max_op, ) @given( inputs=hu.lengths_tensor(max_value=30).flatmap( lambda pair: st.tuples( st.just(pair[0]), st.just(pair[1]), hu.dims(max_value=len(pair[1])), ) ).flatmap( lambda tup: st.tuples( st.just(tup[0]), st.just(tup[1]), hu.arrays( tup[2], dtype=np.int32, elements=st.integers( min_value=0, max_value=len(tup[1]) - 1)), ) ), **hu.gcs_cpu_only) def test_lengths_gather(self, inputs, gc, dc): items = inputs[0] lengths = inputs[1] indices = inputs[2] def lengths_gather_op(items, lengths, indices): ends = np.cumsum(lengths) return [np.concatenate( list(items[ends[i] - lengths[i]:ends[i]] for i in indices))] op = core.CreateOperator( "LengthsGather", ["items", "lengths", "indices"], ["output"] ) self.assertReferenceChecks( device_option=gc, op=op, inputs=[items, lengths, indices], reference=lengths_gather_op, ) @given(**hu.gcs) def test_size_op(self, gc, dc): X = np.array([[1, 2], [3, 4]]).astype(np.float32) def size_op(tensor): return [np.prod(tensor.shape)] op = core.CreateOperator( "Size", ["X"], ["output"] ) self.assertReferenceChecks( device_option=gc, op=op, inputs=[X], reference=size_op, )