def test_graph_inplace_gpu(test_case): x = flow.randn(10, 10, placement=flow.placement("cuda", {0: [0, 1]}), sbp=flow.sbp.split(1)) y = flow.ones(10, placement=flow.placement("cuda", {0: [0, 1]}), sbp=flow.sbp.broadcast) _test_graph_lazy_inplace(test_case, x, y)
def test_graph_inplace_cpu(test_case): x = flow.randn(10, 10, placement=flow.placement("cpu", ranks=[0, 1]), sbp=flow.sbp.split(1)) y = flow.ones(10, placement=flow.placement("cpu", ranks=[0, 1]), sbp=flow.sbp.broadcast) _test_graph_lazy_inplace(test_case, x, y)
def test_new_tensor_global_mode_with_default_args(test_case): placement = flow.placement(type="cpu", ranks=[0, 1]) sbp = flow.sbp.split(0) tensor = flow.randn(4, 4, placement=placement, sbp=sbp) data = [[1, 2], [3, 4]] new_tensor = tensor.new_tensor(data) test_case.assertEqual(new_tensor.dtype, tensor.dtype) test_case.assertEqual(new_tensor.placement, placement) test_case.assertEqual(new_tensor.sbp, (sbp, ))
def test_glu_dim_even_runtime_error(test_case): with test_case.assertRaises(Exception) as context: x = flow.randn(2, 3) m = flow.nn.GLU() y = m(x) test_case.assertTrue( "Halving dimension must be even, but dimension 1 is size 3" in str(context.exception) )
def test_glu_dim_index_error(test_case): with test_case.assertRaises(Exception) as context: x = flow.randn(2, 4) m = flow.nn.GLU(dim=3) y = m(x) test_case.assertTrue( "Dimension out of range (expected to be in range of [-2, 1], but got 3)" in str(context.exception) )
def noisy_top_k_gating(self, x, train, noise_epsilon=1e-2): """Noisy top-k gating. See paper: https://arxiv.org/abs/1701.06538. Args: x: input Tensor with shape [batch_size, input_size] train: a boolean - we only add noise at training time. noise_epsilon: a float Returns: gates: a Tensor with shape [batch_size, num_experts] load: a Tensor with shape [num_experts] """ clean_logits = oneflow.matmul(x, self.w_gate) if self.noisy_gating: raw_noise_stddev = oneflow.matmul(x, self.w_noise) noise_stddev = (self.softplus(raw_noise_stddev) + noise_epsilon) * train # noisy_logits = clean_logits + ( torch.randn(clean_logits.size()) * noise_stddev) # TODO, fix this after torch randn argument fixed noisy_logits = clean_logits + ( flow.randn( clean_logits.size()[0], clean_logits.size()[1], device=clean_logits.device, ) * noise_stddev ) logits = noisy_logits else: logits = clean_logits # calculate topk + 1 that will be needed for the noisy gates top_logits, top_indices = logits.topk(min(self.k + 1, self.num_experts), dim=1) top_k_logits = top_logits[:, : self.k] top_k_indices = top_indices[:, : self.k] top_k_gates = self.softmax(top_k_logits) top_k_logits = top_k_logits.to(logits.device) top_indices = top_indices.to(logits.device) top_logits = top_logits.to(logits.device) zeros = flow.zeros( logits.shape, dtype=logits.dtype, requires_grad=True, device=logits.device ) gates = oneflow.scatter(zeros, 1, top_k_indices, top_k_gates) if self.noisy_gating and self.k < self.num_experts: load = ( self._prob_in_top_k( clean_logits, noisy_logits, noise_stddev, top_logits ) ).sum(0) else: load = self._gates_to_load(gates) return gates, load
def test_hard_shrink_inplace_runtime_error(test_case): with test_case.assertRaises(Exception) as context: x = flow.randn(2) x.requires_grad = True m = flow.nn.Hardshrink(inplace=True) y = m(x) test_case.assertTrue( "a leaf Tensor that requires grad is being used in an in-place operation" in str(context.exception) )
def test_soft_shrink_alpha_runtime_error(test_case): with test_case.assertRaises(Exception) as context: x = flow.randn(2) x.requires_grad = True m = flow.nn.Softshrink(-0.1) y = m(x) test_case.assertTrue( "alpha must be greater or equal to 0, but found to be -0.1." in str(context.exception) )
def _test_graph_buffer_limit(test_case): class StageLayerModule(flow.nn.Module): def __init__(self): super().__init__() self.linear1 = flow.nn.Linear(10, 8, False) self.linear2 = flow.nn.Linear(8, 10, False) flow.nn.init.constant_(self.linear1.weight, 0.023) flow.nn.init.constant_(self.linear2.weight, 1.23) def forward(self, x): out0 = self.linear1(x) out0 = out0 + 1.0 out0 = out0 * 2.0 out1 = self.linear2(out0) return out1 P0 = flow.placement("cuda", {0: [0]}) P1 = flow.placement("cuda", {0: [1]}) PT = flow.placement("cuda", {0: [0, 1]}) B = flow.sbp.broadcast class PipelineModule(flow.nn.Module): def __init__(self): super().__init__() self.layer_0 = StageLayerModule() self.layer_1 = StageLayerModule() self.layer_0.to_consistent(P0, B) self.layer_1.to_consistent(P1, B) def forward(self, x): # stage 0 in0 = x.to_consistent(P0, B) out0 = self.layer_0(in0) # stage 1 in1 = out0.to_consistent(P1, B) out1 = self.layer_1(in1) return out1 pp_m = PipelineModule() pp_m.eval() class PipelineGraph(flow.nn.Graph): def __init__(self): super().__init__() self.pp_m = pp_m def build(self, x): return self.pp_m(x) pp_g = PipelineGraph() for i in range(500): x = flow.randn(16, 10) x = x.to_consistent(P0, B) out = pp_g(x)
def test_parital_fc(test_case): p = flow.env.all_device_placement("cuda") w = flow.randn(50000, 128, placement=p, sbp=flow.sbp.broadcast) label = flow.randint(0, 50000, (512, ), placement=p, sbp=flow.sbp.broadcast) num_sample = 5000 out = flow.distributed_partial_fc_sample(w, label, num_sample) test_case.assertTrue(out[0].shape == flow.Size([512])) test_case.assertTrue(out[1].shape == flow.Size([5000])) test_case.assertTrue(out[2].shape == flow.Size([5000, 128]))
def do_bias_add_dropout_graph(test_case, with_cuda, prob): x = flow.randn(2, 3, 4, 5) bias = flow.randn(5) dropout = flow.nn.Dropout(p=prob) if with_cuda: x = x.cuda() bias = bias.to("cuda") dropout.to("cuda") eager_res = dropout(flow._C.bias_add(x, bias, axis=3)) class GraphToRun(flow.nn.Graph): def __init__(self): super().__init__() self.dropout = dropout def build(self, x, bias): return self.dropout(flow._C.bias_add(x, bias, axis=3)) graph_to_run = GraphToRun() lazy_res = graph_to_run(x, bias) test_case.assertTrue(np.array_equal(eager_res.numpy(), lazy_res.numpy()))
def do_nhwc_bias_add(test_case, with_cuda): a = flow.randn(2, 3, 4, 5) b = flow.randn(3) if with_cuda: a = a.cuda() b = b.cuda() eager_bias_add_res = flow._C.bias_add(a, b, axis=1) class GraphToRun(flow.nn.Graph): def __init__(self): super().__init__() def build(self, a, b): return flow._C.bias_add(a, b, axis=1) graph_to_run = GraphToRun() lazy_bias_add_res = graph_to_run(a, b) test_case.assertTrue( np.allclose( eager_bias_add_res.numpy(), lazy_bias_add_res.numpy(), rtol=1e-5, atol=1e-5 ) )
def test_parital_fc(test_case): p = flow.env.all_device_placement("cuda") w = flow.randn( 50000, 128, placement=p, sbp=flow.sbp.broadcast, requires_grad=True ) label = flow.randint(0, 50000, (512,), placement=p, sbp=flow.sbp.broadcast) num_sample = 5000 out = flow.distributed_partial_fc_sample(w, label, num_sample) test_case.assertTrue(out[0].shape == flow.Size([512])) test_case.assertTrue(out[1].shape == flow.Size([5000])) test_case.assertTrue(out[2].shape == flow.Size([5000, 128])) # test gradient function sample_weight = out[2] sample_weight.sum().backward()
def test_block_with_para_dict_container(test_case): dict_of_p = { "0": flow.nn.Parameter(flow.randn(10, 3)), "1": flow.nn.Parameter(flow.randn(10, 10)), } class ParaDictModule(flow.nn.Module): def __init__(self): super().__init__() self.params = flow.nn.ParameterDict(dict_of_p) def forward(self, x): x = flow._C.matmul(x, self.params["0"]) return x class ParaDictGraph(flow.nn.Graph): def __init__(self): super().__init__() self.params = flow.nn.ParameterDict(dict_of_p) def build(self, x): x = flow._C.matmul(x, self.params["0"]) return x para_dict_m = ParaDictModule() para_dict_g = ParaDictGraph() # print(para_dict_g) input = flow.tensor(np.random.randn(4, 10), dtype=flow.float32) output_m = para_dict_m(input) # print(output_m) output_g = para_dict_g(input) # print(para_dict_g) test_case.assertTrue(np.array_equal(output_m.numpy(), output_g.numpy()))
def test_generator_setstate(test_case): cpu_gen = flow.default_generator flow.randn(100, 100, dtype=flow.float32, device="cpu", generator=cpu_gen) if not os.getenv("ONEFLOW_TEST_CPU_ONLY"): cuda_gen = flow.Generator("cuda") flow.randn(100, 100, dtype=flow.float32, device="cuda", generator=cuda_gen) state = cpu_gen.get_state() flow.randn(100, 100, dtype=flow.float32, device="cpu", generator=cpu_gen) if not os.getenv("ONEFLOW_TEST_CPU_ONLY"): cuda_state = cuda_gen.get_state() flow.randn(100, 100, dtype=flow.float32, device="cuda", generator=cuda_gen) new_state = cpu_gen.get_state() test_case.assertTrue(not np.allclose(new_state.numpy(), state.numpy())) cpu_gen.set_state(state) new_state = cpu_gen.get_state() test_case.assertTrue(np.allclose(new_state.numpy(), state.numpy())) if not os.getenv("ONEFLOW_TEST_CPU_ONLY"): new_cuda_state = cuda_gen.get_state() test_case.assertTrue( not np.allclose(new_cuda_state.numpy(), cuda_state.numpy())) cuda_gen.set_state(cuda_state) new_cuda_state = cuda_gen.get_state() test_case.assertTrue( np.allclose(new_cuda_state.numpy(), cuda_state.numpy()))
def do_eliminate_transpose(test_case, with_cuda): x = flow.randn(2, 3, 4, 5) if with_cuda: x = x.cuda() eager_res = flow.permute(flow.permute(x, (0, 2, 3, 1)), (0, 3, 1, 2)) class GraphToRun(flow.nn.Graph): def __init__(self): super().__init__() def build(self, x): return flow.permute(flow.permute(x, (0, 2, 3, 1)), (0, 3, 1, 2)) graph_to_run = GraphToRun() lazy_res = graph_to_run(x) test_case.assertTrue( np.allclose(eager_res.numpy(), lazy_res.numpy(), rtol=1e-5, atol=1e-5))
def test_2d_split(test_case): pred = flow.randn(8, 10) label = flow.randint(0, 10, (8,)) placement = flow.placement( "cuda", np.array(range(flow.env.get_world_size())).reshape(2, 2) ) pred = pred.to_global( placement=placement, sbp=[flow.sbp.broadcast(), flow.sbp.broadcast()] ) label = label.to_global( placement=placement, sbp=[flow.sbp.broadcast(), flow.sbp.broadcast()] ) _compare_with_nn_cross_entropy_loss( test_case, pred, label, [flow.sbp.split(0), flow.sbp.split(1)], [flow.sbp.split(0), flow.sbp.broadcast()], )
def do_lenet(test_case, with_cuda): x = flow.randn(2, 3, 32, 32) lenet = LeNet() if with_cuda: x = x.cuda() lenet.to("cuda") eager_res = lenet(x) class GraphToRun(flow.nn.Graph): def __init__(self): super().__init__() self.lenet = lenet def build(self, x): return self.lenet(x) graph_to_run = GraphToRun() lazy_res = graph_to_run(x) test_case.assertTrue( np.allclose(eager_res.numpy(), lazy_res.numpy(), rtol=1e-5, atol=1e-5))
def _test_fuse_conv_bn(test_case): data = flow.randn(1, 3, 224, 224) model = resnet50(pretrained=True, progress=True) model.eval() eager_res = model(data) class Resnet50Graph(nn.Graph): def __init__(self): super().__init__() self.model = model def build(self, *input): return self.model(*input) graph = Resnet50Graph() lazy_res = graph(data) test_case.assertTrue( np.allclose(eager_res.numpy(), lazy_res.numpy(), rtol=1e-5, atol=1e-5) )
def do_nhwc_maxpool_2d(test_case, with_cuda, with_return_induces): x = flow.randn(1, 4, 4, 4) maxpool_2d = flow.nn.MaxPool2d(kernel_size=3, padding=1, stride=3, return_indices=with_return_induces) if with_cuda: x = x.cuda() maxpool_2d.to("cuda") eager_maxpool_2d_res = maxpool_2d(x) class GraphToRun(flow.nn.Graph): def __init__(self): super().__init__() self.m = maxpool_2d def build(self, x): return self.m(x) graph_to_run = GraphToRun() lazy_maxpool_2d_res = graph_to_run(x) if with_return_induces: test_case.assertTrue( np.allclose( eager_maxpool_2d_res[0].numpy(), lazy_maxpool_2d_res[0].numpy(), rtol=1e-5, atol=1e-5, )) else: test_case.assertTrue( np.allclose( eager_maxpool_2d_res.numpy(), lazy_maxpool_2d_res.numpy(), rtol=1e-5, atol=1e-5, ))
def do_nhwc_conv(test_case, with_cuda, with_bias): x = flow.randn(2, 3, 4, 5) conv = flow.nn.Conv2d(3, 4, 2, 1, bias=with_bias) if with_cuda: x = x.cuda() conv.to("cuda") eager_conv_x = conv(x) class GraphToRun(flow.nn.Graph): def __init__(self): super().__init__() self.conv = conv def build(self, x): return self.conv(x) graph_to_run = GraphToRun() lazy_conv_x = graph_to_run(x) test_case.assertTrue( np.allclose(eager_conv_x.numpy(), lazy_conv_x.numpy(), rtol=1e-5, atol=1e-5))
def test_global_naive(test_case): placement = flow.placement("cpu", ranks=[0]) sbp = (flow.sbp.broadcast, ) x = flow.randn(16, 16, placement=placement, sbp=sbp) test_case.assertEqual(x.sbp, sbp) test_case.assertEqual(x.placement, placement)
def test(): net = EfficientNetB0() x = oneflow.randn(2, 3, 32, 32) y = net(x) print(y.shape)
def test(): net = GoogLeNet() x = oneflow.randn(1, 3, 32, 32) y = net(x) print(y.size())
def test_local(test_case): pred = flow.randn(8, 10).to("cuda") label = flow.randint(0, 10, (8,)).to("cuda") _compare_with_nn_cross_entropy_loss(test_case, pred, label)
def test(): net = MobileNetV2() x = oneflow.randn(2, 3, 32, 32) y = net(x) print(y.size())
def test(): net = ShuffleNetV2(net_size=0.5) x = oneflow.randn(3, 3, 32, 32) y = net(x) print(y.shape)
def _test_randn_with_flow_size(test_case, device, shape): y1 = flow.randn(flow.Size(shape), device=flow.device(device)) y2 = flow.randn(flow.Size(shape), device=flow.device(device)) test_case.assertTrue(not np.array_equal(y1.numpy(), y2.numpy())) test_case.assertTrue(shape == y1.shape)
def _test_randn_tuple_shape(test_case, device, shape): y1 = flow.randn(shape, device=flow.device(device)) y2 = flow.randn(shape, device=flow.device(device)) test_case.assertTrue(not np.array_equal(y1.numpy(), y2.numpy())) test_case.assertTrue(shape == y1.shape)
def _test_backward(test_case, device, shape): x = flow.randn(*shape, device=flow.device(device), requires_grad=True) y = x.sum() y.backward() test_case.assertTrue( np.allclose(np.ones(shape), x.grad.numpy(), atol=1e-4, rtol=1e-4))