def test_embedding_gradient_shuffle(test_case): arg_dict = OrderedDict() arg_dict["enable_quantize"] = [True, False] arg_dict["fp16"] = [True, False] arg_dict["embedding_size"] = [128, 17] for kwargs in GenArgDict(arg_dict): _test_embedding_gradient_shuffle(test_case, **kwargs)
def test_outputs_int32(test_case): arg_dict = OrderedDict() arg_dict["device"] = test_device arg_dict["output"] = [train_int32] arg_dict["requires_grad"] = [False] for arg in GenArgDict(arg_dict): train(test_case, train_x, **arg)
def test_randperm_graph(test_case): arg_dict = OrderedDict() arg_dict["N"] = [i for i in range(10, 50, 10)] arg_dict["placement"] = [ # 1d flow.placement("cpu", ranks=[0, 1]), flow.placement("cuda", ranks=[0, 1]), # 2d flow.placement("cpu", ranks=[ [0, 1], ]), flow.placement("cuda", ranks=[ [0, 1], ]), ] arg_dict["dtype"] = [ flow.uint8, flow.int8, flow.int32, flow.int64, flow.float32, flow.float64, ] for args in GenArgDict(arg_dict): N = args["N"] placement = args["placement"] dtype = args["dtype"] for sbp in all_sbp(placement, max_dim=1, except_partial_sum=True): _test_graph_randperm(test_case, N, placement, sbp, dtype)
def test_gru(test_case): arg_dict = OrderedDict() arg_dict["input_size"] = [ 1, ] arg_dict["hidden_size"] = [ 1, ] arg_dict["num_layers"] = [ 1, ] arg_dict["bias"] = [True, False] arg_dict["batch_first"] = [True, False] arg_dict["dropout"] = [ 0, ] arg_dict["bidirectional"] = [True, False] module_sbp = flow.sbp.broadcast for args in GenArgDict(arg_dict): for placement in all_placement(): for in_sbp in all_sbp(placement, max_dim=3, valid_split_axis=1): _test_gru_impl(test_case, placement, module_sbp, in_sbp, **args)
def test_constant_graph(test_case): arg_dict = OrderedDict() arg_dict["func"] = ["ones", "zeros", "new_zeros"] arg_dict["shape"] = [(8, ), ( 8, 8, ), (8, 8, 8)] arg_dict["placement"] = [ # 1d flow.placement("cpu", ranks=[0, 1]), flow.placement("cuda", ranks=[0, 1]), # 2d flow.placement("cpu", ranks=[ [0, 1], ]), flow.placement("cuda", ranks=[ [0, 1], ]), ] for args in GenArgDict(arg_dict): func = args["func"] shape = args["shape"] placement = args["placement"] for sbp in all_sbp(placement, max_dim=len(shape), except_partial_sum=True): _test_graph_constant(test_case, func, shape, placement, sbp)
def test_rand_graph(test_case): arg_dict = OrderedDict() arg_dict["shape"] = [(8, ), ( 8, 8, ), (8, 8, 8)] arg_dict["placement"] = [ # 1d flow.placement("cpu", ranks=[0, 1]), flow.placement("cuda", ranks=[0, 1]), # 2d flow.placement("cpu", ranks=[ [0, 1], ]), flow.placement("cuda", ranks=[ [0, 1], ]), ] for args in GenArgDict(arg_dict): shape = args["shape"] placement = args["placement"] for sbp in all_sbp(placement, max_dim=len(shape), except_partial_sum=True): _test_graph_rand(test_case, shape, placement, sbp)
def test_embedding_shuffle(test_case): arg_dict = OrderedDict() arg_dict["dtype"] = [flow.float32, flow.float16] arg_dict["enable_quantize"] = [True, False] for kwargs in GenArgDict(arg_dict): _test_embedding_shuffle(test_case, **kwargs)
def test_fused_scale_tril(test_case): arg_dict = OrderedDict() arg_dict["shape"] = [(5, 5), (4, 6)] arg_dict["diagonal"] = [-1, 0] arg_dict["scale"] = [-2.3, 2.0] for kwargs in GenArgDict(arg_dict): _test_fused_scale_tril(test_case, **kwargs)
def test_local_empty(test_case): arg_dict = OrderedDict() arg_dict["shape"] = [(2, 3), (2, 3, 4), (2, 3, 4, 5)] arg_dict["dtype"] = [flow.float32, flow.float16, flow.int32] arg_dict["device"] = ["cpu", "cuda"] arg_dict["requires_grad"] = [True, False] for arg in GenArgDict(arg_dict): _test_local_empty(test_case, **arg)
def test_fused_dot_feature_interaction(test_case): arg_dict = OrderedDict() arg_dict["self_interaction"] = [True, False] arg_dict["output_concat"] = [False, True] arg_dict["output_padding"] = [0, 1] arg_dict["dtype"] = [flow.float16, flow.float32] for kwargs in GenArgDict(arg_dict): _test_fused_dot_feature_interaction(test_case, **kwargs)
def test_fused_dot_feature_interaction_pooling_sum(test_case): arg_dict = OrderedDict() arg_dict["dtype"] = [flow.float16, flow.float32] arg_dict["feature_dims"] = [[39], [13, 26], [1, 10, 3]] arg_dict["embedding_size"] = [127, 128, 16, 11, 12, 110] for kwargs in GenArgDict(arg_dict): _test_fused_dot_feature_interaction_pooling_sum( test_case, **kwargs)
def test_one_embedding_adagrad(test_case): arg_dict = OrderedDict() arg_dict["weight_decay"] = [0, 0.1] arg_dict["lr_decay"] = [0, 0.1] arg_dict["scale"] = [1, 0.1] arg_dict["learning_rate"] = [0.3, 1.5] arg_dict["train_iters"] = [10] for arg in GenArgDict(arg_dict): compare_with_numpy_adagrad(test_case, **arg)
def test_fused_scale_tril(test_case): arg_dict = OrderedDict() arg_dict["shape"] = [(5, 5), (4, 6)] arg_dict["diagonal"] = [-1, 0, 1] arg_dict["fill_value"] = [-1, 0, 1] arg_dict["scale"] = [-2.3, 0.7, 2] arg_dict["dtype"] = [flow.float32] for kwargs in GenArgDict(arg_dict): _test_fused_scale_tril(test_case, **kwargs)
def test_coin_flip(test_case): arg_dict = OrderedDict() arg_dict["batch_size"] = [1, 2, 50] arg_dict["random_seed"] = [None, 1, -1] arg_dict["probability"] = [0.0, 0.5, 1.0] # TODO: CoinFlip support cuda kernel # arg_dict["device"] = ["cpu", "cuda"] arg_dict["device"] = ["cpu"] for arg in GenArgDict(arg_dict): _test_coin_flip_impl(test_case, **arg)
def test_reduce_lr_on_plateau(test_case): arg_dict = OrderedDict() arg_dict["mode"] = ["min", "max"] arg_dict["factor"] = [0.1, 0.3] arg_dict["patience"] = [2, 5] arg_dict["threshold"] = [1e-3, 1e-5] arg_dict["threshold_mode"] = ["rel", "abs"] arg_dict["cooldown"] = [0, 1] arg_dict["min_lr"] = [0, 1e-3] arg_dict["eps"] = [1e-5, 1e-8] for arg in GenArgDict(arg_dict): compare_with_torch_reduce_lr(test_case, **arg)
def test_sgd(test_case): arg_dict = OrderedDict() arg_dict["device"] = ["cpu", "cuda"] arg_dict["x_shape"] = [(10, )] arg_dict["momentum"] = [0.0, 0.9] arg_dict["weight_decay"] = [0.0, 0.9] arg_dict["learning_rate"] = [1, 0.1] arg_dict["train_iters"] = [10] arg_dict["reload_state_step"] = [5] # save and load optim state arg_dict["save_load_by_pickle"] = [False, True] for arg in GenArgDict(arg_dict): compare_with_numpy_sgd(test_case, **arg)
def test_one_embedding_adam(test_case): arg_dict = OrderedDict() arg_dict["weight_decay"] = [0, 0.1] arg_dict["scale"] = [1, 0.1] arg_dict["learning_rate"] = [1, 1.5] arg_dict["train_iters"] = [10] arg_dict["do_bias_correction"] = [True, False] arg_dict["beta1"] = [0.9, 0.8] arg_dict["beta2"] = [0.9, 0.8] for arg in GenArgDict(arg_dict): compare_with_numpy_adam(test_case, **arg)
def test_ftrl(test_case): arg_dict = OrderedDict() arg_dict["weight_decay"] = [ 0.0 ] # TODO(zzk): Currently Only support weight_decay = 0.0. arg_dict["lr_power"] = [-0.2, -0.05] arg_dict["lambda1"] = [0.1] arg_dict["lambda2"] = [0.00] arg_dict["beta"] = [1.0] arg_dict["scale"] = [1, 0.1] arg_dict["learning_rate"] = [0.3, 1.5] arg_dict["train_iters"] = [10] for arg in GenArgDict(arg_dict): compare_with_numpy_ftrl(test_case, **arg)
def test_coin_flip_consistent(test_case): arg_dict = OrderedDict() arg_dict["batch_size"] = [8, 64] arg_dict["random_seed"] = [None, 1, -1] arg_dict["probability"] = [0.0, 0.5, 1.0] for args in GenArgDict(arg_dict): for placement in all_placement(): # TODO: CoinFlip support cuda kernel if placement.type == "cuda": continue for sbp in all_sbp(placement, max_dim=1, except_partial_sum=True): _test_consistent_coin_flip( test_case, **args, placement=placement, sbp=sbp )
def test_coin_flip_graph(test_case): arg_dict = OrderedDict() arg_dict["batch_size"] = [8] arg_dict["random_seed"] = [None, 1, -1] arg_dict["probability"] = [0.0, 0.5, 1.0] arg_dict["placement"] = [ # 1d flow.placement("cpu", ranks=[0, 1]), # TODO: CoinFlip support cuda kernel # flow.placement("cuda", ranks=[0, 1]), # 2d flow.placement("cpu", ranks=[[0, 1],]), # TODO: CoinFlip support cuda kernel # flow.placement("cuda", ranks=[[0, 1],]), ] for args in GenArgDict(arg_dict): placement = args["placement"] for sbp in all_sbp(placement, max_dim=1, except_partial_sum=True): _test_graph_coin_flip(test_case, **args, sbp=sbp)
def test_unique_key_value(test_case): arg_dict = OrderedDict() arg_dict["has_table_id"] = [True, False] arg_dict["num_tables"] = [13, 26, 1] for kwargs in GenArgDict(arg_dict): _test_unique_key_value(test_case, **kwargs)
def test_id_shuffle(test_case): arg_dict = OrderedDict() arg_dict["has_table_id"] = [True, False] arg_dict["num_tables"] = [1, 26] for kwargs in GenArgDict(arg_dict): _test_id_shuffle(test_case, **kwargs)