def _test_0d_rand(test_case, device, shape): y1 = flow.rand(*shape, device=flow.device(device)) y2 = flow.rand(*shape, device=flow.device(device)) test_case.assertTrue( np.allclose(y1.numpy(), y2.numpy(), atol=1e-4, rtol=1e-4) ) # 0d is [] and [] test_case.assertTrue(shape == y1.shape)
def _test_different_dtype(test_case, device, shape): y1 = flow.rand(*shape, dtype=flow.float32, device=flow.device(device)) y2 = flow.rand(*shape, dtype=flow.float64, device=flow.device(device)) test_case.assertTrue(not np.array_equal(y1.numpy(), y2.numpy())) test_case.assertTrue(shape == y1.shape) with test_case.assertRaises( oneflow._oneflow_internal.exception.UnimplementedException ): flow.rand(*shape, dtype=flow.int32, device=flow.device(device))
def _test_with_generator(test_case, device, shape): gen = flow.Generator() gen.manual_seed(0) y1 = flow.rand( *shape, dtype=flow.float32, device=flow.device(device), generator=gen ) gen.manual_seed(0) y2 = flow.rand( *shape, dtype=flow.float32, device=flow.device(device), generator=gen ) test_case.assertTrue(np.allclose(y1.numpy(), y2.numpy(), atol=1e-4, rtol=1e-4))
def _test_clip_grad_norm_consistent_impl(test_case, shape, sbp, placement, max_norm, norm_type): of_input = flow.rand(*shape, dtype=flow.float32, sbp=sbp, placement=placement, requires_grad=True) np_input = of_input.to_global(sbp=flow.sbp.broadcast).to_local().numpy() m = flow.nn.ReLU() of_out = m(of_input) of_out = of_out.sum() of_out.backward() of_total_norm = flow.nn.utils.clip_grad_norm_(of_input, max_norm, norm_type).to_local() np_total_norm, np_grad = _clip_grad_norm_np(np_input, max_norm, norm_type) test_case.assertTrue( np.allclose(of_total_norm.numpy(), np_total_norm, 1e-4, 1e-4, equal_nan=True)) test_case.assertTrue( np.allclose( of_input.grad.to_global(sbp=flow.sbp.broadcast).to_local().numpy(), np_grad, 1e-4, 1e-4, equal_nan=True, ))
def _test_rnn_utils_pad_sequence(test_case, device): input_size = random.randint(10, 200) max_seq_len = random.randint(20, 500) batch_size = random.randint(20, 500) lengths = [] lengths.append(max_seq_len) for i in range(batch_size - 1): lengths.append(random.randint(1, max_seq_len)) lengths.sort(reverse=True) sequences = [] for i in range(batch_size): sequences.append(flow.rand(lengths[i], input_size).to(device)) flow_res = flow_rnn_utils.pad_sequence(sequences) torch_inputs = [ torch.tensor(ft.numpy(), device=device) for ft in sequences ] torch_res = torch_rnn_utils.pad_sequence(torch_inputs) test_case.assertTrue( np.allclose( torch_res.cpu().detach().numpy(), flow_res.cpu().detach().numpy(), atol=1e-8, ))
def test_tensor_is_consistent(self): with self.assertRaises(Exception) as context: data = flow.rand(2, dtype=flow.float32) print(data.is_consistent()) self.assertTrue( ".is_consistent has been removed, please use .is_global instead" in str(context.exception) )
def test_to_torch_cpu_with_0_size_data(test_case): flow_t = flow.rand(5, 3, 0) torch_t = flow.utils.to_torch(flow_t) test_case.assertTrue( np.allclose(flow_t.numpy(), torch_t.numpy(), rtol=0.001, atol=0.001)) test_case.assertEqual(flow_t.numpy().dtype, torch_t.numpy().dtype)
def test_tensor_to_consistent(self): with self.assertRaises(Exception) as context: data = flow.rand(2, dtype=flow.float32) placement = flow.env.all_device_placement("cuda") sbp = flow.sbp.split(0) global_data = data.to_consistent(placement=placement, sbp=sbp) self.assertTrue( ".to_consistent has been removed, please use .to_global instead" in str(context.exception) )
def test_add_with_alpha(test_case): try: flow._oneflow_internal.global_view.set_sync_timeout(10) data = flow.rand(2, dtype=flow.float32) placement = flow.env.all_device_placement("cuda") sbp = flow.sbp.split(0) consistent_data = data.to_consistent(placement=placement, sbp=sbp) if flow.env.get_rank() == 0: print(data.mean()) print(consistent_data.mean()) except Exception as e: err_msg = "Maybe executing different code in different ranks, please check if the code is branched and operates on the global tensor" assert err_msg in str(e)
def test_global_branch_error_with_local_to_global(test_case): try: os.environ["ONEFLOW_TIMEOUT_SECONDS"] = "2" data = flow.rand(2, dtype=flow.float32) placement = flow.env.all_device_placement("cuda") sbp = flow.sbp.split(0) if flow.env.get_rank() == 0: global_data = data.to_global(placement=placement, sbp=sbp) else: time.sleep(2) except Exception as e: err_msg = "Maybe executing different code in different ranks, please check if the code is branched and operates on the global tensor" assert err_msg in str(e) finally: os.environ["ONEFLOW_TIMEOUT_SECONDS"] = "300"
def drop_path(x, drop_prob: float = 0.5, training: bool = False): """Drop paths (Stochastic Depth) per sample (when applied in main path of residual blocks). This is the same as the DropConnect impl I created for EfficientNet, etc networks, however, the original name is misleading as 'Drop Connect' is a different form of dropout in a separate paper... See discussion: https://github.com/tensorflow/tpu/issues/494#issuecomment-532968956 ... I've opted for changing the layer and argument names to 'drop path' rather than mix DropConnect as a layer name and use 'survival rate' as the argument. """ if drop_prob == 0.0 or not training: return x keep_prob = 1 - drop_prob shape = (x.shape[0], ) + (1, ) * ( x.ndim - 1) # work with diff dim tensors, not just 2D ConvNets random_tensor = keep_prob + flow.rand( *shape, dtype=x.dtype, device=x.device) random_tensor = random_tensor.floor() # binarize output = x.div(keep_prob) * random_tensor return output
def test_to_torch_cpu(test_case): flow_t = flow.rand(5, 3, 3) numpy_from_flow = flow_t.numpy() torch_t = flow.utils.to_torch(flow_t) test_case.assertEqual(torch_t.data_ptr(), numpy_from_flow.__array_interface__["data"][0]) numpy_from_flow[0][0] = [1, 2, 3] test_case.assertTrue( np.allclose(torch_t.numpy(), numpy_from_flow, rtol=0.001, atol=0.001)) test_case.assertTrue( np.allclose(flow_t.numpy(), torch_t.numpy(), rtol=0.001, atol=0.001)) test_case.assertEqual(flow_t.numpy().dtype, torch_t.numpy().dtype)
def _test_rnn_pack_sequence(test_case, device): l = ["tanh", "relu"] input_size = random.randint(10, 1000) hidden_size = random.randint(10, 1000) num_layers = random.randint(1, 6) nonlinearity = l[0 if num_layers <= 3 else 1] grad_tol = 1e-4 if nonlinearity == "relu": grad_tol = 100 bias = random.randint(-10, 10) <= 0 batch_first = False dropout = 0 bidirectional = random.randint(-10, 10) <= 0 rnn_torch = torch.nn.RNN( input_size=input_size, hidden_size=hidden_size, num_layers=num_layers, nonlinearity=nonlinearity, bias=bias, batch_first=batch_first, dropout=dropout, bidirectional=bidirectional, ) rnn_flow = flow.nn.RNN( input_size=input_size, hidden_size=hidden_size, num_layers=num_layers, nonlinearity=nonlinearity, bias=bias, batch_first=batch_first, dropout=dropout, bidirectional=bidirectional, ) torch_state_dict = rnn_torch.state_dict() new_dict = {} for k, v in torch_state_dict.items(): new_dict[k] = v.detach().numpy() rnn_flow.load_state_dict(new_dict) rnn_flow = rnn_flow.to(device) rnn_torch = rnn_torch.to(device) max_seq_len = random.randint(10, 50) batch_size = random.randint(10, 50) lengths = [] lengths.append(max_seq_len) for i in range(batch_size - 1): lengths.append(random.randint(1, max_seq_len)) lengths.sort(reverse=True) sequences = [] for i in range(batch_size): sequences.append(flow.rand(lengths[i], input_size).to(device)) x_flow = flow_rnn_utils.pack_sequence(sequences) torch_inputs = [ torch.tensor(ft.numpy(), device=device) for ft in sequences ] x_torch = torch_rnn_utils.pack_sequence(torch_inputs) out_torch, hid_torch = rnn_torch(x_torch) out_flow, hid_flow = rnn_flow(x_flow) z_torch = out_torch.data.sum() z_torch.backward() z_flow = out_flow.data.sum() z_flow.backward() test_case.assertTrue( np.allclose( out_torch.data.cpu().detach().numpy(), out_flow.data.cpu().detach().numpy(), atol=1e-5, )) test_case.assertTrue( np.allclose( hid_torch.cpu().detach().numpy(), hid_flow.cpu().detach().numpy(), atol=1e-5, )) all_weights = rnn_torch.all_weights torch_params = [] for ls in all_weights: for l in ls: torch_params.append(l) all_weights = rnn_flow.all_weights flow_params = [] for ls in all_weights: for l in ls: flow_params.append(l) for i in range(len(flow_params)): torch_np = torch_params[i].grad.cpu().numpy() flow_np = flow_params[i].grad.cpu().numpy() test_case.assertTrue(np.allclose(torch_np, flow_np, atol=grad_tol))
def _test_lstm_pack_sequence(test_case, device): input_size = random.randint(10, 1000) hidden_size = random.randint(12, 1000) num_layers = random.randint(1, 6) bias = random.randint(-10, 10) <= 0 batch_first = False dropout = 0 bidirectional = random.randint(-10, 10) <= 0 proj_size = random.randint(0, hidden_size - 1) lstm_torch = torch.nn.LSTM( input_size=input_size, hidden_size=hidden_size, num_layers=num_layers, bias=bias, batch_first=batch_first, dropout=dropout, bidirectional=bidirectional, proj_size=proj_size, ) lstm_flow = flow.nn.LSTM( input_size=input_size, hidden_size=hidden_size, num_layers=num_layers, bias=bias, batch_first=batch_first, dropout=dropout, bidirectional=bidirectional, proj_size=proj_size, ) torch_state_dict = lstm_torch.state_dict() new_dict = {} for k, v in torch_state_dict.items(): new_dict[k] = v.detach().numpy() lstm_flow.load_state_dict(new_dict) lstm_flow = lstm_flow.to(device) lstm_torch = lstm_torch.to(device) max_seq_len = random.randint(10, 50) batch_size = random.randint(10, 50) lengths = [] lengths.append(max_seq_len) for i in range(batch_size - 1): lengths.append(random.randint(1, max_seq_len)) lengths.sort(reverse=True) sequences = [] for i in range(batch_size): sequences.append(flow.rand(lengths[i], input_size).to(device)) x_flow = flow_rnn_utils.pack_sequence(sequences) torch_inputs = [ torch.tensor(ft.numpy(), device=device) for ft in sequences ] x_torch = torch_rnn_utils.pack_sequence(torch_inputs) out_torch, hid_torch = lstm_torch(x_torch) out_flow, hid_flow = lstm_flow(x_flow) z_torch = out_torch.data.sum() z_torch.backward() z_flow = out_flow.data.sum() z_flow.backward() test_case.assertTrue( np.allclose( out_torch.data.cpu().detach().numpy(), out_flow.data.cpu().detach().numpy(), atol=1e-5, )) test_case.assertTrue( np.allclose( hid_torch[0].cpu().detach().numpy(), hid_flow[0].cpu().detach().numpy(), atol=1e-5, )) test_case.assertTrue( np.allclose( hid_torch[1].cpu().detach().numpy(), hid_flow[1].cpu().detach().numpy(), atol=1e-5, )) all_weights = lstm_torch.all_weights torch_params = [] for ls in all_weights: for l in ls: torch_params.append(l) all_weights = lstm_flow.all_weights flow_params = [] for ls in all_weights: for l in ls: flow_params.append(l) for i in range(len(flow_params)): torch_np = torch_params[i].grad.cpu().numpy() flow_np = flow_params[i].grad.cpu().numpy() test_case.assertTrue(np.allclose(torch_np, flow_np, atol=1e-4))
def build(self): x = flow.rand(*shape, placement=placement, sbp=sbp) return x
def _test_consistent_rand(test_case, shape, placement, sbp): x = flow.rand(*shape, placement=placement, sbp=sbp) test_case.assertEqual(x.shape, flow.Size(shape)) test_case.assertEqual(x.sbp, sbp) test_case.assertEqual(x.placement, placement)
def test_infos_of_nodes(test_case): alexnet_module = alexnet() alexnet_graph = Graph(alexnet_module) if not alexnet_graph._is_compiled: alexnet_graph._compile(flow.rand(1, 3, 224, 224)) graph_str = repr(alexnet_graph) if not alexnet_graph._is_compiled: alexnet_graph._compile(flow.rand(shape_input)) size_where = 2 if "cuda" in graph_str: size_where = 3 p_size = re.compile(r"size=\(.*?\)", re.S) p_type = re.compile(r"dtype=.*?\)", re.S) types = ["INPUT", "PARAMETER", "BUFFER", "OUTPUT"] num_nodes = {} for t in types: data = re.finditer(t + ":.*", graph_str) cnt = 0 for i in data: cnt += 1 attrs = i.group().split(":") size_strs = re.findall(p_size, attrs[size_where]) type_strs = re.findall(p_type, attrs[size_where]) test_case.assertEqual(size_strs != [], True) test_case.assertEqual(type_strs != [], True) size_attr = size_strs[0].replace("size=", "") type_attr = type_strs[0].replace("dtype=", "").replace(")", "") if size_attr[-2] == ",": size_attr = size_attr.replace(",", "") if type_attr[-1] == ",": type_attr = type_attr.replace(",", "") test_case.assertEqual(type_attr, "oneflow.float32") data_size = tuple(map(int, size_attr[1:-1].split(", "))) if cnt == 1 and t == "PARAMETER": test_case.assertEqual(data_size, (64, 3, 11, 11)) elif cnt == 15 and t == "PARAMETER": test_case.assertEqual(data_size, (1000, 4096)) num_nodes[t] = cnt test_case.assertEqual(num_nodes["INPUT"] != 0, True) test_case.assertEqual(num_nodes["BUFFER"], 0) test_case.assertEqual(num_nodes["PARAMETER"], 16) test_case.assertEqual(num_nodes["OUTPUT"] != 0, True) # get graph proto, if you don't _compile the graph, the _graph_proto will be None graph_input = re.search(r"INPUT:.*", graph_str).group().split(":") shape_input = tuple( map( int, re.findall(p_size, graph_input[size_where])[0].replace( "size=", "")[1:-1].split(", "), )) graph_proto = alexnet_graph._graph_proto nodes = {} for op in graph_proto.net.op: nodes[op.name] = op op_names = [] op_attrs = [] for node_name in nodes: node = nodes[node_name] if is_user_op(node): op_name = node.user_conf.op_type_name op_attr = parse_attr(node.user_conf.attr) op_names.append(op_name) op_attrs.append(op_attr) test_case.assertEqual(op_names[0], "conv2d") test_case.assertEqual(op_names[1], "bias_add") test_case.assertEqual(op_names[2], "relu") kernel_size = op_attrs[0].get("kernel_size", None) strides = op_attrs[0].get("strides", None) padding_before = op_attrs[0].get("padding_before", None) test_case.assertEqual(kernel_size, (11, 11)) test_case.assertEqual(strides, (4, 4)) test_case.assertEqual(padding_before, (2, 2))
def test_max_with_diff_size(test_case): x = flow.rand(1, 1, 4, requires_grad=True) y = flow.rand(1, 4, requires_grad=True) x = random_tensor(3, 1, 1, 4) y = random_tensor(2, 1, 4) return torch.max(x, y)
def train(self): # Learning rate cache for decaying. g_lr = self.g_lr d_lr = self.d_lr c_lr = self.c_lr start_iters = 0 if self.resume_iters: pass norm = Normalizer() data_iter = iter(self.data_loader) print("Start training......") start_time = datetime.now() for i in range(start_iters, self.num_iters): # Preprocess input data # Fetch real images and labels. try: x_real, speaker_idx_org, label_org = next(data_iter) except: data_iter = iter(self.data_loader) x_real, speaker_idx_org, label_org = next(data_iter) # Generate target domain labels randomly. rand_idx = flow.randperm(label_org.size(0)) label_trg = label_org[rand_idx] speaker_idx_trg = speaker_idx_org[rand_idx] x_real = x_real.to(self.device) # Original domain one-hot labels. label_org = label_org.to(self.device) # Target domain one-hot labels. label_trg = label_trg.to(self.device) speaker_idx_org = speaker_idx_org.to(self.device) speaker_idx_trg = speaker_idx_trg.to(self.device) # Train the discriminator # Compute loss with real audio frame. CELoss = nn.CrossEntropyLoss() cls_real = self.C(x_real) cls_loss_real = CELoss(input=cls_real, target=speaker_idx_org) self.reset_grad() cls_loss_real.backward() self.c_optimizer.step() # Logging. loss = {} loss["C/C_loss"] = cls_loss_real.item() out_r = self.D(x_real, label_org) # Compute loss with fake audio frame. x_fake = self.G(x_real, label_trg) out_f = self.D(x_fake.detach(), label_trg) d_loss_t = nn.BCEWithLogitsLoss()( input=out_f, target=flow.zeros_like( out_f).float()) + nn.BCEWithLogitsLoss()( input=out_r, target=flow.ones_like(out_r).float()) out_cls = self.C(x_fake) d_loss_cls = CELoss(input=out_cls, target=speaker_idx_trg) # Compute loss for gradient penalty. alpha = flow.rand(x_real.size(0), 1, 1, 1).to(self.device) x_hat = ((alpha * x_real + (1 - alpha) * x_fake).detach().requires_grad_(True)) out_src = self.D(x_hat, label_trg) # TODO: Second-order derivation is not currently supported in oneflow, so gradient penalty cannot be used temporarily. if self.use_gradient_penalty: d_loss_gp = self.gradient_penalty(out_src, x_hat) d_loss = d_loss_t + self.lambda_cls * d_loss_cls + 5 * d_loss_gp else: d_loss = d_loss_t + self.lambda_cls * d_loss_cls self.reset_grad() d_loss.backward() self.d_optimizer.step() loss["D/D_loss"] = d_loss.item() # Train the generator if (i + 1) % self.n_critic == 0: # Original-to-target domain. x_fake = self.G(x_real, label_trg) g_out_src = self.D(x_fake, label_trg) g_loss_fake = nn.BCEWithLogitsLoss()( input=g_out_src, target=flow.ones_like(g_out_src).float()) out_cls = self.C(x_real) g_loss_cls = CELoss(input=out_cls, target=speaker_idx_org) # Target-to-original domain. x_reconst = self.G(x_fake, label_org) g_loss_rec = nn.L1Loss()(x_reconst, x_real) # Original-to-Original domain(identity). x_fake_iden = self.G(x_real, label_org) id_loss = nn.L1Loss()(x_fake_iden, x_real) # Backward and optimize. g_loss = (g_loss_fake + self.lambda_cycle * g_loss_rec + self.lambda_cls * g_loss_cls + self.lambda_identity * id_loss) self.reset_grad() g_loss.backward() self.g_optimizer.step() # Logging. loss["G/loss_fake"] = g_loss_fake.item() loss["G/loss_rec"] = g_loss_rec.item() loss["G/loss_cls"] = g_loss_cls.item() loss["G/loss_id"] = id_loss.item() loss["G/g_loss"] = g_loss.item() # Miscellaneous # Print out training information. if (i + 1) % self.log_step == 0: et = datetime.now() - start_time et = str(et)[:-7] log = "Elapsed [{}], Iteration [{}/{}]".format( et, i + 1, self.num_iters) for tag, value in loss.items(): log += ", {}: {:.4f}".format(tag, value) print(log) # Translate fixed images for debugging. if (i + 1) % self.sample_step == 0: with flow.no_grad(): d, speaker = TestSet(self.test_dir).test_data() target = random.choice( [x for x in speakers if x != speaker]) label_t = self.spk_enc.transform([target])[0] label_t = np.asarray([label_t]) for filename, content in d.items(): f0 = content["f0"] ap = content["ap"] sp_norm_pad = self.pad_coded_sp( content["coded_sp_norm"]) convert_result = [] for start_idx in range( 0, sp_norm_pad.shape[1] - FRAMES + 1, FRAMES): one_seg = sp_norm_pad[:, start_idx:start_idx + FRAMES] one_seg = flow.Tensor(one_seg).to(self.device) one_seg = one_seg.view(1, 1, one_seg.size(0), one_seg.size(1)) l = flow.Tensor(label_t) one_seg = one_seg.to(self.device) l = l.to(self.device) one_set_return = self.G(one_seg, l).detach().cpu().numpy() one_set_return = np.squeeze(one_set_return) one_set_return = norm.backward_process( one_set_return, target) convert_result.append(one_set_return) convert_con = np.concatenate(convert_result, axis=1) convert_con = convert_con[:, 0:content["coded_sp_norm"]. shape[1]] contigu = np.ascontiguousarray(convert_con.T, dtype=np.float64) decoded_sp = decode_spectral_envelope(contigu, SAMPLE_RATE, fft_size=FFTSIZE) f0_converted = norm.pitch_conversion( f0, speaker, target) wav = synthesize(f0_converted, decoded_sp, ap, SAMPLE_RATE) name = f"{speaker}-{target}_iter{i+1}_{filename}" path = os.path.join(self.sample_dir, name) print(f"[save]:{path}") sf.write(path, wav, SAMPLE_RATE) # Save model checkpoints. if (i + 1) % self.model_save_step == 0: G_path = os.path.join(self.model_save_dir, "{}-G".format(i + 1)) D_path = os.path.join(self.model_save_dir, "{}-D".format(i + 1)) C_path = os.path.join(self.model_save_dir, "{}-C".format(i + 1)) flow.save(self.G.state_dict(), G_path) flow.save(self.D.state_dict(), D_path) flow.save(self.C.state_dict(), C_path) print("Saved model checkpoints into {}...".format( self.model_save_dir)) # Decay learning rates. if (i + 1) % self.lr_update_step == 0 and (i + 1) > ( self.num_iters - self.num_iters_decay): g_lr -= self.g_lr / float(self.num_iters_decay) d_lr -= self.d_lr / float(self.num_iters_decay) c_lr -= self.c_lr / float(self.num_iters_decay) self.update_lr(g_lr, d_lr, c_lr) print("Decayed learning rates, g_lr: {}, d_lr: {}.".format( g_lr, d_lr))
def _test_rand(test_case, device, shape): y1 = flow.rand(*shape, device=flow.device(device)) y2 = flow.rand(*shape, device=flow.device(device)) test_case.assertTrue(not np.array_equal(y1.numpy(), y2.numpy())) test_case.assertTrue(shape == y1.shape)
def _test_backward(test_case, device, shape): x = flow.rand(*shape, device=flow.device(device), requires_grad=True) y = x.sum() y.backward() test_case.assertTrue(np.array_equal(np.ones(shape), x.grad.numpy()))
def test_consistent_naive(test_case): placement = flow.placement("cpu", {0: [0]}) sbp = (flow.sbp.broadcast,) x = flow.rand(16, 16, placement=placement, sbp=sbp) test_case.assertEqual(x.sbp, sbp) test_case.assertEqual(x.placement, placement)