def test_div(test_case): x = flow.Tensor(np.random.randn(2, 3)) y = flow.Tensor(np.random.randn(2, 3)) of_out = flow.div(x, y) np_out = np.divide(x.numpy(), y.numpy()) test_case.assertTrue(np.allclose(of_out.numpy(), np_out, 1e-4, 1e-4)) x = 5 y = flow.Tensor(np.random.randn(2, 3)) of_out = flow.div(x, y) np_out = np.divide(x, y.numpy()) test_case.assertTrue(np.allclose(of_out.numpy(), np_out, 1e-4, 1e-4)) x = flow.Tensor(np.random.randn(2, 3)) y = 5 of_out = flow.div(x, y) np_out = np.divide(x.numpy(), y) test_case.assertTrue(np.allclose(of_out.numpy(), np_out, 1e-4, 1e-4)) x = flow.Tensor(np.random.randn(2, 3)) y = flow.Tensor(np.random.randn(1, 1)) of_out = flow.div(x, y) np_out = np.divide(x.numpy(), y.numpy()) test_case.assertTrue(np.allclose(of_out.numpy(), np_out, 1e-4, 1e-4)) x = flow.Tensor(np.array([5])) y = flow.Tensor(np.random.randn(2, 3)) of_out = flow.div(x, y) np_out = np.divide(x.numpy(), y.numpy()) test_case.assertTrue(np.allclose(of_out.numpy(), np_out, 1e-4, 1e-4)) x = flow.Tensor(np.random.randn(2, 3)) y = flow.Tensor(np.array([5])) of_out = flow.div(x, y) np_out = np.divide(x.numpy(), y.numpy()) test_case.assertTrue(np.allclose(of_out.numpy(), np_out, 1e-4, 1e-4))
def __rtruediv__(self, other): return flow.div(other, self)
def _test_div_impl(test_case, shape, device): x = flow.tensor(np.random.randn(*shape), dtype=flow.float32, device=flow.device(device)) y = flow.tensor(np.random.randn(*shape), dtype=flow.float32, device=flow.device(device)) of_out = flow.div(x, y) np_out = np.divide(x.numpy(), y.numpy()) test_case.assertTrue(np.allclose(of_out.numpy(), np_out, 0.0001, 0.0001)) x = 5 y = flow.tensor(np.random.randn(*shape), dtype=flow.float32, device=flow.device(device)) of_out = flow.div(x, y) np_out = np.divide(x, y.numpy()) test_case.assertTrue(np.allclose(of_out.numpy(), np_out, 0.0001, 0.0001)) x = flow.tensor(np.random.randn(*shape), dtype=flow.float32, device=flow.device(device)) y = 5 of_out = flow.div(x, y) np_out = np.divide(x.numpy(), y) test_case.assertTrue(np.allclose(of_out.numpy(), np_out, 0.0001, 0.0001)) x = flow.tensor(np.random.randn(*shape), dtype=flow.float32, device=flow.device(device)) y = flow.tensor(np.random.randn(1, 1), dtype=flow.float32, device=flow.device(device)) of_out = flow.div(x, y) np_out = np.divide(x.numpy(), y.numpy()) test_case.assertTrue(np.allclose(of_out.numpy(), np_out, 0.0001, 0.0001)) x = flow.tensor(np.array([5.0]), dtype=flow.float32, device=flow.device(device)) y = flow.tensor(np.random.randn(*shape), dtype=flow.float32, device=flow.device(device)) of_out = flow.div(x, y) np_out = np.divide(x.numpy(), y.numpy()) test_case.assertTrue(np.allclose(of_out.numpy(), np_out, 0.0001, 0.0001)) x = flow.tensor( np.random.randn(*shape), dtype=flow.float32, device=flow.device(device), requires_grad=True, ) y = flow.tensor( np.array([5.0]), dtype=flow.float32, device=flow.device(device), requires_grad=True, ) of_out = flow.div(x, y) np_out = np.divide(x.numpy(), y.numpy()) test_case.assertTrue(np.allclose(of_out.numpy(), np_out, 0.0001, 0.0001)) of_out = of_out.sum() of_out.backward() np_grad_x = np.full(shape, 0.2) test_case.assertTrue(np.allclose(x.grad.numpy(), np_grad_x, 0.0001, 0.0001))
def average_chkpt(datadir, start, end): id_chkpt = [str(i) for i in range(int(start), int(end) + 1)] print("Average these number %s models" % ",".join(id_chkpt)) chkpts = ["model.epoch.%s.pt" % idx for idx in id_chkpt] params_dict = {} params_keys = {} new_state = None num_models = len(chkpts) for chkpt in chkpts: frontend_state = flow.load(os.path.join(datadir, chkpt, "frontend.pt")) encoder_state = flow.load(os.path.join(datadir, chkpt, "encoder.pt")) decoder_state = flow.load(os.path.join(datadir, chkpt, "decoder.pt")) state = { "frontend": frontend_state, "encoder": encoder_state, "decoder": decoder_state, } if new_state is None: new_state = state for key, value in state.items(): if key != "model": continue model_params = value model_params_keys = list(model_params.keys()) if key not in params_keys: params_keys[key] = model_params_keys if key not in params_dict: params_dict[key] = {} for k in params_keys[key]: p = model_params[k] if k not in params_dict[key]: params_dict[key][k] = p.clone() # NOTE: clone() is needed in case of p is a shared parameter else: params_dict[key][k] += p averaged_params = {} for key, states in params_dict.items(): averaged_params[key] = {} for k, v in states.items(): averaged_params[key][k] = v try: averaged_params[key][k].div_(num_models) except: if "batch_norm.num_batches_tracked" in k: averaged_params[key][k] = flow.div(averaged_params[key][k], num_models).long() else: print("Key: %s Tensor: %s" % (key, k)) raise ValueError new_state[key] = averaged_params[key] flow.save( new_state["frontend"], os.path.join(datadir, "model.average.from%sto%s.pt" % (start, end), "frontend.pt"), ) flow.save( new_state["encoder"], os.path.join(datadir, "model.average.from%sto%s.pt" % (start, end), "encoder.pt"), ) flow.save( new_state["decoder"], os.path.join(datadir, "model.average.from%sto%s.pt" % (start, end), "decoder.pt"), ) print("Save the average checkpoint as %s" % os.path.join(datadir, "model.average.from%sto%s.pt" % (start, end))) print("Done!")