Beispiel #1
0
    def training_step(self, batch, optimizer_idx):
        if optimizer_idx == 0:
            # generator
            (z,) = batch
            g_out = self._generator(z, trainable=True, const_init=True)
            g_logits = self._discriminator(g_out, trainable=False, const_init=True)
            g_loss = flow.nn.sigmoid_cross_entropy_with_logits(
                flow.ones_like(g_logits),
                g_logits,
                name="Gloss_sigmoid_cross_entropy_with_logits",
            )
            return (g_loss, g_out)
        elif optimizer_idx == 1:
            # discriminator
            z, images = batch
            g_out = self._generator(z, trainable=False, const_init=True)
            g_logits = self._discriminator(g_out, trainable=True, const_init=True)
            d_loss_fake = flow.nn.sigmoid_cross_entropy_with_logits(
                flow.zeros_like(g_logits),
                g_logits,
                name="Dloss_fake_sigmoid_cross_entropy_with_logits",
            )

            d_logits = self._discriminator(
                images, trainable=True, reuse=True, const_init=True
            )
            d_loss_real = flow.nn.sigmoid_cross_entropy_with_logits(
                flow.ones_like(d_logits),
                d_logits,
                name="Dloss_real_sigmoid_cross_entropy_with_logits",
            )
            d_loss = d_loss_fake + d_loss_real
            return d_loss
Beispiel #2
0
    def forward(self, inputs, targets):
        """
        Args:
            inputs (torch.Tensor): feature matrix with shape (batch_size, feat_dim).
            targets (torch.LongTensor): ground truth labels with shape (num_classes).
        """
        n = inputs.size(0)

        # Compute pairwise distance, replace by the official when merged
        dist = flow.pow(inputs, 2).sum(dim=1).expand(n, n)
        dist = dist + flow.transpose(dist, dim0=1, dim1=0)
        temp1 = -2 * flow.matmul(inputs, flow.transpose(inputs, dim0=1,
                                                        dim1=0))
        dist = flow.add(dist, temp1)
        dist = flow.sqrt(flow.clamp(dist, min=1e-12))
        # For each anchor, find the hardest positive and negative
        mask = targets.expand(n, n).eq(
            flow.transpose(targets.expand(n, n), dim0=1, dim1=0))
        dist_ap, dist_an = [], []
        y1 = flow.zeros((1, n), dtype=flow.float32).to("cuda")
        y2 = flow.Tensor(np.exp(100 * np.ones((1, n)))).to("cuda")

        for i in range(n):
            temp_dist = flow.slice(dist, [(i, i + 1, 1)])
            temp_mask = flow.slice(mask, [(i, i + 1, 1)])
            temp_mask_rev = flow.slice(1 - mask, [(i, i + 1, 1)])
            dist_ap.append(temp_mask.where(temp_dist, y1).max().unsqueeze(0))
            dist_an.append(
                temp_mask_rev.where(temp_dist, y2).min().unsqueeze(0))
        dist_ap = flow.cat(dist_ap)
        dist_an = flow.cat(dist_an)

        # Compute ranking hinge loss
        y = flow.ones_like(dist_an)
        return self.ranking_loss(dist_an, dist_ap, y)
Beispiel #3
0
        def test_discriminator(
                z=flow.FixedTensorDef((self.batch_size, 100)),
                images=flow.FixedTensorDef((self.batch_size, 1, 28, 28)),
                label1=flow.FixedTensorDef((self.batch_size, 1)),
                label0=flow.FixedTensorDef((self.batch_size, 1)),
        ):
            g_out = self.generator(z, trainable=False, const_init=True)
            g_logits = self.discriminator(g_out,
                                          trainable=True,
                                          const_init=True)
            d_loss_fake = flow.nn.sigmoid_cross_entropy_with_logits(
                flow.zeros_like(g_logits),
                g_logits,
                name="Dloss_fake_sigmoid_cross_entropy_with_logits",
            )

            d_logits = self.discriminator(images,
                                          trainable=True,
                                          reuse=True,
                                          const_init=True)
            d_loss_real = flow.nn.sigmoid_cross_entropy_with_logits(
                flow.ones_like(d_logits),
                d_logits,
                name="Dloss_real_sigmoid_cross_entropy_with_logits",
            )
            d_loss = d_loss_fake + d_loss_real
            flow.losses.add_loss(d_loss)

            return d_loss
Beispiel #4
0
    def forward(
        self,
        inputs: Dict[str, flow.Tensor],
    ) -> Dict[str, flow.Tensor]:
        input_ids = inputs.get("input_ids")
        attention_mask = inputs.get("attention_mask")
        token_type_ids = inputs.get("token_type_ids")
        position_ids = inputs.get("position_ids")

        embeddings = self.embeddings(input_ids, token_type_ids, position_ids)

        if attention_mask is None:
            attention_mask = flow.ones_like(input_ids, device=input_ids.device)
        extended_attention_mask = self.get_extended_attention_mask(
            attention_mask, input_ids)

        encoder_output, attention_output = self.encoder(
            embeddings, extended_attention_mask)
        pooled_output = self.pooler(encoder_output)
        output_dict = {
            "encoder_output": encoder_output,
            "pooled_output": pooled_output
        }

        return output_dict
Beispiel #5
0
def _test_autograd_backward(test_case, shape, device):
    np_input = np.random.rand(*shape)
    of_input = flow.tensor(np_input,
                           dtype=flow.float32,
                           device=flow.device(device),
                           requires_grad=True)
    of_out = of_input**2
    of_out_sum = of_out.sum()
    of_out_sum.backward()
    test_case.assertTrue(
        np.allclose(of_input.grad.numpy(), np_input * 2, 0.0001, 0.0001))
    of_input = flow.tensor(np_input,
                           dtype=flow.float32,
                           device=flow.device(device),
                           requires_grad=True)
    of_out = of_input**2
    of_out_sum = of_out.sum()
    of_out_sum.backward(flow.ones_like(of_out_sum) * 3)
    test_case.assertTrue(
        np.allclose(of_input.grad.numpy(), np_input * 6, 0.0001, 0.0001))
    of_input = flow.tensor(np_input,
                           dtype=flow.float32,
                           device=flow.device(device),
                           requires_grad=True)
    of_out = of_input**2
    of_out_sum = of_out.sum()
    of_out_sum.backward(retain_graph=True)
    of_out_sum.backward(retain_graph=True)
    test_case.assertTrue(
        np.allclose(of_input.grad.numpy(), np_input * 4, 0.0001, 0.0001))
Beispiel #6
0
 def build(self,inputs,targets):
     n=inputs.shape[0]
     if self.distance=='euclidean':
         dist=flow.math.pow(inputs,2)
         dist=flow.math.reduce_sum(dist, axis=1, keepdims=True)
         dist=np.tile(dist,(n, n))
         dist_t=flow.transpose(dist)
         dist=dist+dist_t
         inputs_t=flow.transpose(inputs)
         dist=addmm(dist,inputs,inputs_t,beta=1,alpha=-2)
         dist=flow.clamp(min_value=1e-12)
         dist=flow.math.sqrt(dist)
     elif self.distance == 'cosine':
         fnorm=np.linalg.norm(inputs,ord=2,axis=1,keepdims=True)
         l2norm=np.tile(inputs,(inputs.shape))
         l2norm=inputs/l2norm
         l2norm_t=flow.transpose(l2norm)
         dist=-np.matmul(l2norm,l2norm_t)
     target_expand=np.tile(targets,(n,n))
     target_expand_t=flow.transpose(target_expand)
     mask=flow.math.equal(target_expand,target_expand_t)
     dist_ap, dist_an = [], []
     for i in range(n):
         temp=np.ndarray.max(dist[i][mask[i]])
         temp=flow.expand_dims(temp,axis=0)
         dist_ap.append(temp)
         temp=np.ndarray.min(dist[i][mask[i]==0])
         temp=flow.expand_dims(temp,axis=0)
         dist_an.append(temp)
         dist_ap=flow.concat(dist_ap)
         dist_an=flow.concat(dist_an)
     y=flow.ones_like(dist_an)
     loss=self.ranking_loss(dist_an, dist_ap, y,margin=self.margin)
     return loss
Beispiel #7
0
        def test_discriminator(
            z: oft.Numpy.Placeholder((self.batch_size, 100)),
            images: oft.Numpy.Placeholder((self.batch_size, 1, 28, 28)),
            label1: oft.Numpy.Placeholder((self.batch_size, 1)),
            label0: oft.Numpy.Placeholder((self.batch_size, 1)),
        ):
            g_out = self.generator(z, trainable=False, const_init=True)
            g_logits = self.discriminator(g_out, trainable=True, const_init=True)
            d_loss_fake = flow.nn.sigmoid_cross_entropy_with_logits(
                flow.zeros_like(g_logits),
                g_logits,
                name="Dloss_fake_sigmoid_cross_entropy_with_logits",
            )

            d_logits = self.discriminator(
                images, trainable=True, reuse=True, const_init=True
            )
            d_loss_real = flow.nn.sigmoid_cross_entropy_with_logits(
                flow.ones_like(d_logits),
                d_logits,
                name="Dloss_real_sigmoid_cross_entropy_with_logits",
            )
            d_loss = d_loss_fake + d_loss_real
            flow.optimizer.SGD(
                flow.optimizer.PiecewiseConstantScheduler([], [self.lr]), momentum=0
            ).minimize(d_loss)

            return d_loss
Beispiel #8
0
def _test_ones_like_int(test_case, shape, device):
    x = flow.tensor(np.random.randn(*shape), dtype=flow.int, device=flow.device(device))
    y = flow.ones_like(x)
    test_case.assertTrue(y.dtype is flow.int)
    test_case.assertTrue(y.shape == x.shape)
    test_case.assertTrue(y.device == x.device)
    y_numpy = np.ones_like(x.numpy())
    test_case.assertTrue(np.array_equal(y.numpy(), y_numpy))
Beispiel #9
0
    def build(self, inputs, targets):
        """
        Args:
            inputs (torch.Tensor): feature matrix with shape (batch_size, feat_dim).
            targets (torch.LongTensor): ground truth labels with shape (num_classes).
        """
        n = inputs.shape[0]
        dist = math.reduce_sum(math.pow(
            inputs, flow.constant_like(inputs, 2, dtype=flow.float32)),
                               axis=1)
        shape_tensor = flow.constant(value=0.0,
                                     dtype=flow.float32,
                                     shape=(n, n))
        dist = flow.broadcast_like(dist, like=shape_tensor, broadcast_axes=[1])
        dist = math.add(
            dist, flow.transpose(dist, perm=(1, 0),
                                 batch_axis_non_change=True))
        temp1 = math.multiply(
            -2,
            flow.matmul(
                inputs,
                flow.transpose(inputs, perm=(1, 0),
                               batch_axis_non_change=True)))
        dist = math.add(dist, temp1)
        dist = math.sqrt(flow.clamp(dist, min_value=1e-12))
        mask = math.equal(
            flow.broadcast_like(targets, like=shape_tensor,
                                broadcast_axes=[1]),
            flow.transpose(flow.broadcast_like(targets,
                                               like=shape_tensor,
                                               broadcast_axes=[1]),
                           perm=(1, 0),
                           batch_axis_non_change=True))
        mask_rev = math.not_equal(
            flow.broadcast_like(targets, like=shape_tensor,
                                broadcast_axes=[1]),
            flow.transpose(flow.broadcast_like(targets,
                                               like=shape_tensor,
                                               broadcast_axes=[1]),
                           perm=(1, 0),
                           batch_axis_non_change=True))
        dist_ap, dist_an = [], []
        for i in range(n):
            temp_dist = flow.slice_v2(dist, [(i, i + 1, 1)])
            temp_mask = flow.slice_v2(mask, [(i, i + 1, 1)])
            temp_mask_rev = flow.slice_v2(mask_rev, [(i, i + 1, 1)])
            dist_ap.append(
                math.reduce_max(
                    flow.gather_nd(temp_dist, flow.where(temp_mask))))
            dist_an.append(
                math.reduce_min(
                    flow.gather_nd(temp_dist, flow.where(temp_mask_rev))))
        dist_ap = flow.concat(dist_ap, 0)
        dist_an = flow.concat(dist_an, 0)
        y = flow.ones_like(dist_an)
        # return dist_an, dist_ap, y

        return self._MarginRankingLoss(dist_an, dist_ap, y)
Beispiel #10
0
    def forward(self, predicted, target):

        # ------------ AM Softmax ------------ #
        predicted = predicted / (predicted.norm(dim=0) + self.epsilon)
        indexes = flow.Tensor(range(predicted.size(0))).long().to(
            predicted.device)
        cos_theta_y = predicted[indexes, target]
        cos_theta_y_m = cos_theta_y - self.m
        exp_s = (flow.ones_like(cos_theta_y_m) * np.e)**(self.s *
                                                         cos_theta_y_m)
        sum_cos_theta_j = ((flow.ones_like(predicted) * np.e)
                           **(predicted * self.s)).sum(dim=1) - (
                               (flow.ones_like(predicted[indexes, target]) *
                                np.e)**(predicted[indexes, target] * self.s))
        log = -flow.log(exp_s /
                        (exp_s + sum_cos_theta_j + self.epsilon)).mean()

        return log
Beispiel #11
0
 def train_generator(z=flow.FixedTensorDef((self.batch_size,
                                            self.z_dim)), ):
     g_out = self.generator(z, trainable=True)
     g_logits = self.discriminator(g_out, trainable=False)
     g_loss = flow.nn.sigmoid_cross_entropy_with_logits(
         flow.ones_like(g_logits),
         g_logits,
         name="Gloss_sigmoid_cross_entropy_with_logits")
     flow.losses.add_loss(g_loss)
     return g_loss, g_out
def _test_ones_like_int(test_case, placement, sbp, shape, device):
    x = flow.tensor(np.random.randn(*shape),
                    dtype=flow.int,
                    device=flow.device(device))
    x = x.to_global(placement=placement, sbp=sbp)
    y = flow.ones_like(x)
    test_case.assertTrue(y.dtype is flow.int)
    test_case.assertTrue(y.shape == x.shape)
    test_case.assertTrue(y.placement == placement)
    y_numpy = np.ones(x.numpy().shape)
    test_case.assertTrue(np.array_equal(y.numpy(), y_numpy))
Beispiel #13
0
        def test_generator(
            z: oft.Numpy.Placeholder((self.batch_size, self.z_dim)),
            label1: oft.Numpy.Placeholder((self.batch_size, 1)),
        ):
            g_out = self.generator(z, trainable=True, const_init=True)
            g_logits = self.discriminator(g_out, trainable=False, const_init=True)
            g_loss = flow.nn.sigmoid_cross_entropy_with_logits(
                flow.ones_like(g_logits),
                g_logits,
                name="Gloss_sigmoid_cross_entropy_with_logits",
            )

            flow.optimizer.SGD(
                flow.optimizer.PiecewiseConstantScheduler([], [self.lr]), momentum=0
            ).minimize(g_loss)
            return g_loss
Beispiel #14
0
    def get_target_tensor(self, prediction, target_is_real):
        """Create label tensors with the same size as the input.

        Parameters:
            prediction (tensor) - - tpyically the prediction from a discriminator
            target_is_real (bool) - - if the ground truth label is for real images or fake images

        Returns:
            A label tensor filled with ground truth label, and with the size of the input
        """

        if target_is_real:
            target_tensor = flow.ones_like(prediction)
        else:
            target_tensor = flow.zeros_like(prediction)
        return target_tensor
Beispiel #15
0
def _test_autograd_grad(test_case, shape, device):
    np_input = np.random.rand(*shape)
    of_input = flow.tensor(np_input,
                           dtype=flow.float32,
                           device=flow.device(device),
                           requires_grad=True)
    of_out = of_input**2
    of_out_sum = of_out.sum()
    grad = flow.autograd.grad(of_out_sum, of_input)[0]
    test_case.assertTrue(of_input.grad is None)
    test_case.assertTrue(
        np.allclose(grad.numpy(), np_input * 2, 0.0001, 0.0001))
    of_input = flow.tensor(np_input,
                           dtype=flow.float32,
                           device=flow.device(device),
                           requires_grad=True)
    of_out = of_input**2
    of_out_sum = of_out.sum()
    grad = flow.autograd.grad(of_out_sum, of_input,
                              flow.ones_like(of_out_sum) * 3)[0]
    test_case.assertTrue(
        np.allclose(grad.numpy(), np_input * 6, 0.0001, 0.0001))
Beispiel #16
0
    def recognize_beam(self, encoder_outputs, char_list, args):
        """
        Beam search, decode one utterence now.
        Args:
            encoder_outputs: T x H #418 x 512
            char_list: list of character #4233
            args: args.beam #5

        Returns:
            nbest_hyps:
        """
        # search params
        beam = args.beam_size
        nbest = args.nbest
        if args.decode_max_len == 0:
            maxlen = encoder_outputs.size(0)
        else:
            maxlen = args.decode_max_len

        encoder_outputs = encoder_outputs.unsqueeze(0)
        # prepare sos
        ys = flow.ones(1, 1).fill_(self.sos_id).type_as(encoder_outputs).long()
        hyp = {"score": 0.0, "yseq": ys}
        hyps = [hyp]
        ended_hyps = []

        for i in range(maxlen):
            hyps_best_kept = []
            for hyp in hyps:
                ys = hyp["yseq"]
                ys = ys.to(device=encoder_outputs.device)
                # -- Prepare masks
                non_pad_mask = flow.ones_like(ys).to(
                    dtype=flow.float32).unsqueeze(-1)
                slf_attn_mask = get_subsequent_mask(ys)
                # -- Forward
                dec_output = self.dropout(
                    self.tgt_word_emb(ys) * self.x_logit_scale +
                    self.positional_encoding(ys))

                for dec_layer in self.layer_stack:
                    dec_output, _, _ = dec_layer(
                        dec_output,
                        encoder_outputs,
                        non_pad_mask=non_pad_mask,
                        slf_attn_mask=slf_attn_mask,
                        dec_enc_attn_mask=None,
                    )

                seq_logit = self.tgt_word_prj(dec_output[:, -1])
                local_logit = F.softmax(seq_logit)
                local_scores = flow.log(local_logit)
                # topk scores
                local_best_scores, local_best_ids = flow.topk(local_scores,
                                                              beam,
                                                              dim=1)

                for j in range(beam):
                    new_hyp = {}
                    new_hyp["score"] = hyp["score"] + local_best_scores[0, j]
                    new_hyp["yseq"] = (flow.ones(
                        1, (1 + ys.size(1))).type_as(encoder_outputs).long())
                    new_hyp["yseq"][:, :ys.size(1)] = hyp["yseq"]
                    new_hyp["yseq"][:, ys.size(1)] = int(
                        float(local_best_ids[0, j].numpy()))
                    hyps_best_kept.append(new_hyp)

                hyps_best_kept = sorted(hyps_best_kept,
                                        key=lambda x: x["score"],
                                        reverse=True)[:beam]
            # end for hyp in hyps
            hyps = hyps_best_kept
            # add eos in the final loop to avoid that there are no ended hyps
            if i == maxlen - 1:
                for hyp in hyps:
                    hyp["yseq"] = flow.cat(
                        [
                            hyp["yseq"],
                            flow.ones(1, 1).fill_(
                                self.eos_id).type_as(encoder_outputs).long(),
                        ],
                        dim=1,
                    )

            # add ended hypothes to a final list, and removed them from current hypothes
            # (this will be a probmlem, number of hyps < beam)
            remained_hyps = []
            for hyp in hyps:
                if hyp["yseq"][0, -1] == self.eos_id:
                    ended_hyps.append(hyp)
                else:
                    remained_hyps.append(hyp)

            hyps = remained_hyps
            if len(hyps) > 0:
                print("remeined hypothes: " + str(len(hyps)))
            else:
                print("no hypothesis. Finish decoding.")
                break
            for hyp in hyps:
                print("hypo: " + "".join(
                    [char_list[int(x.numpy())] for x in hyp["yseq"][0, 1:]]))

        nbest_hyps = sorted(ended_hyps, key=lambda x: x["score"],
                            reverse=True)[:min(len(ended_hyps), nbest)]
        for hyp in nbest_hyps:
            hyp["yseq"] = hyp["yseq"][0].cpu().numpy().tolist()
        return nbest_hyps
Beispiel #17
0
def Causal_Self_Attention(x, config, name='csa'):
    """
    Input:: 
        x : Eembedded words input[B, T, C]
            -- B is the batch size
            -- T is the sequence length(block_size)
            -- C is the dimension of the embedding (n_embd)
               C/head_number = dimension of each head(d_k)
        config: class object defined with models.GPTConfig
    Output::
        y : output of x, which can be used as new x in next interation
    
 
    Description::
        This functions is the causl_sefl_attention core, which is a part of multiple head attention
        schema.
        Code refered from: https://github.com/karpathy/minGPT/blob/master/mingpt/model.py
        Theory refered from: http://jalammar.github.io/illustrated-gpt2/
        Related paper: 
    """
    assert config.n_embd % config.n_head == 0

    #def
    B, T, C = x.shape
    #Kaiming_initialize
    kaiming_init_C = flow.kaiming_initializer(shape=(C, C))
    ## calculate query, key, values for all heads in batch and move head forward to be the batch dim
    # define: key, query and value projections for all heads
    # process: query + key ----> value
    # dimension: (B,T,C) -> (B, nh, T, hs), nh*ns=C

    # query:The query is a representation of the current word used to score against all the other words (using their keys).
    query = flow.layers.dense(x,
                              units=config.n_embd,
                              kernel_initializer=kaiming_init_C,
                              name=(name + '_query'))
    query = flow.reshape(query, [B, T, config.n_head, C // config.n_head])
    query = flow.transpose(query, [0, 2, 1, 3])
    # key:Key vectors are like labels for all the words in the segment.
    key = flow.layers.dense(x,
                            units=config.n_embd,
                            kernel_initializer=kaiming_init_C,
                            name=(name + '_key'))
    key = flow.reshape(key, [B, T, config.n_head, C // config.n_head])
    key = flow.transpose(key, [0, 2, 1, 3])
    # value: Value vectors are actual word representations
    value = flow.layers.dense(x,
                              units=config.n_embd,
                              kernel_initializer=kaiming_init_C,
                              name=(name + 'value'))
    value = flow.reshape(value, [B, T, config.n_head, C // config.n_head])
    value = flow.transpose(value, [0, 2, 1, 3])

    ##causal self-attention; Self-attend: (B, nh, T, hs) x (B, nh, hs, T) -> (B, nh, T, T)
    att = flow.matmul(query, flow.transpose(
        key, [0, 1, 3, 2])) * (1.0 / math.sqrt(key.shape[-1]))
    att_tril = flow.math.tril(
        flow.constant(value=int(-1),
                      dtype=flow.int32,
                      shape=(B, config.n_head, T, T),
                      name=name + "_ConstantLike_tril"))
    att_tril = att_tril + flow.ones_like(like=att_tril, dtype=flow.int32)
    att = flow.masked_fill(att, att_tril, float('-inf'))
    att = flow.nn.softmax(att, name=name + 'att')
    att = flow.nn.dropout(att, config.attn_pdrop)
    ## QK*V: (B, nh, T, T) x (B, nh, T, hs) -> (B, nh, T, hs)
    y = flow.matmul(att, value)
    y = flow.transpose(y, [0, 2, 1, 3])
    y = flow.reshape(y, [B, T, C])
    y = flow.nn.dropout(y, config.resid_pdrop)
    return y
Beispiel #18
0
    def validation_for_B_dir(self):
        num_mcep = 80
        sampling_rate = 22050
        frame_period = 5.0
        validation_B_dir = self.validation_B_dir
        output_B_dir = self.output_B_dir

        os.makedirs(output_B_dir, exist_ok=True)

        print("Generating Validation Data A from B...")
        for file in os.listdir(validation_B_dir):
            filePath = os.path.join(validation_B_dir, file)
            wav, _ = librosa.load(filePath, sr=sampling_rate, mono=True)
            wav = preprocess.wav_padding(wav=wav,
                                         sr=sampling_rate,
                                         frame_period=frame_period,
                                         multiple=4)
            f0, timeaxis, sp, ap = preprocess.world_decompose(
                wav=wav, fs=sampling_rate, frame_period=frame_period)
            f0_converted = preprocess.pitch_conversion(
                f0=f0,
                mean_log_src=self.dataset_B_mean,
                std_log_src=self.dataset_B_std,
                mean_log_target=self.dataset_A_mean,
                std_log_target=self.dataset_A_std,
            )
            coded_sp = preprocess.world_encode_spectral_envelop(
                sp=sp, fs=sampling_rate, dim=num_mcep)
            coded_sp_transposed = coded_sp.T
            coded_sp_norm = (coded_sp_transposed -
                             self.dataset_B_mean) / self.dataset_B_std
            coded_sp_norm = np.array([coded_sp_norm])

            if flow.cuda.is_available():
                coded_sp_norm = flow.tensor(coded_sp_norm).cuda().float()
            else:
                coded_sp_norm = flow.tensor(coded_sp_norm).float()

            coded_sp_converted_norm = self.generator_B2A(
                coded_sp_norm, flow.ones_like(coded_sp_norm))
            coded_sp_converted_norm = coded_sp_converted_norm.cpu().detach(
            ).numpy()
            coded_sp_converted_norm = np.squeeze(coded_sp_converted_norm)
            coded_sp_converted = (
                coded_sp_converted_norm * self.dataset_A_std +
                self.dataset_A_mean)
            coded_sp_converted = coded_sp_converted.T
            coded_sp_converted = np.ascontiguousarray(
                coded_sp_converted).astype(np.double)
            decoded_sp_converted = preprocess.world_decode_spectral_envelop(
                coded_sp=coded_sp_converted, fs=sampling_rate)

            wav_transformed = preprocess.world_speech_synthesis(
                f0=f0_converted[0],
                decoded_sp=decoded_sp_converted,
                ap=ap,
                fs=sampling_rate,
                frame_period=frame_period,
            )

            sf.write(
                os.path.join(output_B_dir,
                             "convert_" + os.path.basename(file)),
                wav_transformed,
                sampling_rate,
            )
Beispiel #19
0
    def infer(self):
        """Implements the infering loop for MaskCycleGAN-VC
        """
        # load pretrain models
        self.loadModel(self.pretrain_models)

        num_mcep = 80
        sampling_rate = self.sample_rate
        frame_period = 5.0
        infer_A_dir = self.infer_data_dir

        print("Generating Validation Data B from A...")
        for file in os.listdir(infer_A_dir):
            filePath = os.path.join(infer_A_dir, file)
            wav, _ = librosa.load(filePath, sr=sampling_rate, mono=True)
            wav = preprocess.wav_padding(wav=wav,
                                         sr=sampling_rate,
                                         frame_period=frame_period,
                                         multiple=4)
            f0, timeaxis, sp, ap = preprocess.world_decompose(
                wav=wav, fs=sampling_rate, frame_period=frame_period)
            f0_converted = preprocess.pitch_conversion(
                f0=f0,
                mean_log_src=self.dataset_A_mean,
                std_log_src=self.dataset_A_std,
                mean_log_target=self.dataset_B_mean,
                std_log_target=self.dataset_B_std,
            )
            coded_sp = preprocess.world_encode_spectral_envelop(
                sp=sp, fs=sampling_rate, dim=num_mcep)
            coded_sp_transposed = coded_sp.T
            coded_sp_norm = (coded_sp_transposed -
                             self.dataset_A_mean) / self.dataset_A_std
            coded_sp_norm = np.array([coded_sp_norm])

            if flow.cuda.is_available():
                coded_sp_norm = flow.tensor(coded_sp_norm).cuda().float()
            else:
                coded_sp_norm = flow.tensor(coded_sp_norm).float()

            coded_sp_converted_norm = self.generator_A2B(
                coded_sp_norm, flow.ones_like(coded_sp_norm))
            coded_sp_converted_norm = coded_sp_converted_norm.cpu().detach(
            ).numpy()
            coded_sp_converted_norm = np.squeeze(coded_sp_converted_norm)
            coded_sp_converted = (
                coded_sp_converted_norm * self.dataset_B_std +
                self.dataset_B_mean)
            coded_sp_converted = coded_sp_converted.T
            coded_sp_converted = np.ascontiguousarray(
                coded_sp_converted).astype(np.double)
            decoded_sp_converted = preprocess.world_decode_spectral_envelop(
                coded_sp=coded_sp_converted, fs=sampling_rate)

            wav_transformed = preprocess.world_speech_synthesis(
                f0=f0_converted[0],
                decoded_sp=decoded_sp_converted,
                ap=ap,
                fs=sampling_rate,
                frame_period=frame_period,
            )

            sf.write(
                os.path.join(infer_A_dir, "convert_" + os.path.basename(file)),
                wav_transformed,
                sampling_rate,
            )
Beispiel #20
0
    def train(self):
        # Learning rate cache for decaying.
        g_lr = self.g_lr
        d_lr = self.d_lr
        c_lr = self.c_lr

        start_iters = 0
        if self.resume_iters:
            pass

        norm = Normalizer()
        data_iter = iter(self.data_loader)

        print("Start training......")
        start_time = datetime.now()

        for i in range(start_iters, self.num_iters):
            # Preprocess input data
            # Fetch real images and labels.
            try:
                x_real, speaker_idx_org, label_org = next(data_iter)
            except:
                data_iter = iter(self.data_loader)
                x_real, speaker_idx_org, label_org = next(data_iter)

            # Generate target domain labels randomly.
            rand_idx = flow.randperm(label_org.size(0))
            label_trg = label_org[rand_idx]
            speaker_idx_trg = speaker_idx_org[rand_idx]

            x_real = x_real.to(self.device)
            # Original domain one-hot labels.
            label_org = label_org.to(self.device)
            # Target domain one-hot labels.
            label_trg = label_trg.to(self.device)
            speaker_idx_org = speaker_idx_org.to(self.device)
            speaker_idx_trg = speaker_idx_trg.to(self.device)

            # Train the discriminator
            # Compute loss with real audio frame.
            CELoss = nn.CrossEntropyLoss()
            cls_real = self.C(x_real)
            cls_loss_real = CELoss(input=cls_real, target=speaker_idx_org)

            self.reset_grad()
            cls_loss_real.backward()
            self.c_optimizer.step()
            # Logging.
            loss = {}
            loss["C/C_loss"] = cls_loss_real.item()

            out_r = self.D(x_real, label_org)
            # Compute loss with fake audio frame.
            x_fake = self.G(x_real, label_trg)
            out_f = self.D(x_fake.detach(), label_trg)
            d_loss_t = nn.BCEWithLogitsLoss()(
                input=out_f, target=flow.zeros_like(
                    out_f).float()) + nn.BCEWithLogitsLoss()(
                        input=out_r, target=flow.ones_like(out_r).float())

            out_cls = self.C(x_fake)
            d_loss_cls = CELoss(input=out_cls, target=speaker_idx_trg)

            # Compute loss for gradient penalty.
            alpha = flow.rand(x_real.size(0), 1, 1, 1).to(self.device)
            x_hat = ((alpha * x_real +
                      (1 - alpha) * x_fake).detach().requires_grad_(True))
            out_src = self.D(x_hat, label_trg)

            # TODO: Second-order derivation is not currently supported in oneflow, so gradient penalty cannot be used temporarily.
            if self.use_gradient_penalty:
                d_loss_gp = self.gradient_penalty(out_src, x_hat)
                d_loss = d_loss_t + self.lambda_cls * d_loss_cls + 5 * d_loss_gp
            else:
                d_loss = d_loss_t + self.lambda_cls * d_loss_cls

            self.reset_grad()
            d_loss.backward()
            self.d_optimizer.step()

            loss["D/D_loss"] = d_loss.item()

            # Train the generator
            if (i + 1) % self.n_critic == 0:
                # Original-to-target domain.
                x_fake = self.G(x_real, label_trg)
                g_out_src = self.D(x_fake, label_trg)
                g_loss_fake = nn.BCEWithLogitsLoss()(
                    input=g_out_src, target=flow.ones_like(g_out_src).float())

                out_cls = self.C(x_real)
                g_loss_cls = CELoss(input=out_cls, target=speaker_idx_org)

                # Target-to-original domain.
                x_reconst = self.G(x_fake, label_org)
                g_loss_rec = nn.L1Loss()(x_reconst, x_real)

                # Original-to-Original domain(identity).
                x_fake_iden = self.G(x_real, label_org)
                id_loss = nn.L1Loss()(x_fake_iden, x_real)

                # Backward and optimize.
                g_loss = (g_loss_fake + self.lambda_cycle * g_loss_rec +
                          self.lambda_cls * g_loss_cls +
                          self.lambda_identity * id_loss)

                self.reset_grad()
                g_loss.backward()
                self.g_optimizer.step()

                # Logging.
                loss["G/loss_fake"] = g_loss_fake.item()
                loss["G/loss_rec"] = g_loss_rec.item()
                loss["G/loss_cls"] = g_loss_cls.item()
                loss["G/loss_id"] = id_loss.item()
                loss["G/g_loss"] = g_loss.item()

            # Miscellaneous
            # Print out training information.
            if (i + 1) % self.log_step == 0:
                et = datetime.now() - start_time
                et = str(et)[:-7]
                log = "Elapsed [{}], Iteration [{}/{}]".format(
                    et, i + 1, self.num_iters)
                for tag, value in loss.items():
                    log += ", {}: {:.4f}".format(tag, value)
                print(log)

            # Translate fixed images for debugging.
            if (i + 1) % self.sample_step == 0:
                with flow.no_grad():
                    d, speaker = TestSet(self.test_dir).test_data()
                    target = random.choice(
                        [x for x in speakers if x != speaker])
                    label_t = self.spk_enc.transform([target])[0]
                    label_t = np.asarray([label_t])

                    for filename, content in d.items():
                        f0 = content["f0"]
                        ap = content["ap"]
                        sp_norm_pad = self.pad_coded_sp(
                            content["coded_sp_norm"])

                        convert_result = []
                        for start_idx in range(
                                0, sp_norm_pad.shape[1] - FRAMES + 1, FRAMES):
                            one_seg = sp_norm_pad[:,
                                                  start_idx:start_idx + FRAMES]

                            one_seg = flow.Tensor(one_seg).to(self.device)
                            one_seg = one_seg.view(1, 1, one_seg.size(0),
                                                   one_seg.size(1))
                            l = flow.Tensor(label_t)
                            one_seg = one_seg.to(self.device)
                            l = l.to(self.device)
                            one_set_return = self.G(one_seg,
                                                    l).detach().cpu().numpy()
                            one_set_return = np.squeeze(one_set_return)
                            one_set_return = norm.backward_process(
                                one_set_return, target)
                            convert_result.append(one_set_return)

                        convert_con = np.concatenate(convert_result, axis=1)
                        convert_con = convert_con[:,
                                                  0:content["coded_sp_norm"].
                                                  shape[1]]
                        contigu = np.ascontiguousarray(convert_con.T,
                                                       dtype=np.float64)
                        decoded_sp = decode_spectral_envelope(contigu,
                                                              SAMPLE_RATE,
                                                              fft_size=FFTSIZE)
                        f0_converted = norm.pitch_conversion(
                            f0, speaker, target)
                        wav = synthesize(f0_converted, decoded_sp, ap,
                                         SAMPLE_RATE)

                        name = f"{speaker}-{target}_iter{i+1}_{filename}"
                        path = os.path.join(self.sample_dir, name)
                        print(f"[save]:{path}")
                        sf.write(path, wav, SAMPLE_RATE)

            # Save model checkpoints.
            if (i + 1) % self.model_save_step == 0:
                G_path = os.path.join(self.model_save_dir,
                                      "{}-G".format(i + 1))
                D_path = os.path.join(self.model_save_dir,
                                      "{}-D".format(i + 1))
                C_path = os.path.join(self.model_save_dir,
                                      "{}-C".format(i + 1))
                flow.save(self.G.state_dict(), G_path)
                flow.save(self.D.state_dict(), D_path)
                flow.save(self.C.state_dict(), C_path)
                print("Saved model checkpoints into {}...".format(
                    self.model_save_dir))

            # Decay learning rates.
            if (i + 1) % self.lr_update_step == 0 and (i + 1) > (
                    self.num_iters - self.num_iters_decay):
                g_lr -= self.g_lr / float(self.num_iters_decay)
                d_lr -= self.d_lr / float(self.num_iters_decay)
                c_lr -= self.c_lr / float(self.num_iters_decay)
                self.update_lr(g_lr, d_lr, c_lr)
                print("Decayed learning rates, g_lr: {}, d_lr: {}.".format(
                    g_lr, d_lr))
Beispiel #21
0
    def forward(self, inputs, targets):
        n = inputs.shape[0]
        # Compute pairwise distance, replace by the official when merged
        tempname = datetime.datetime.now().strftime('%Y-%m-%d-%H-%M-%S.%f')
        shape_tensor = flow.constant(value=0.0,
                                     dtype=flow.float32,
                                     shape=(n, n))
        if self.distance == 'euclidean':
            blob_2 = flow.get_variable(
                "blob_2_" + tempname,
                shape=inputs.shape,
                initializer=flow.constant_initializer(2),
                dtype=inputs.dtype)
            dist = flow.math.pow(inputs, blob_2)

            dist = flow.math.reduce_sum(dist, axis=1, keepdims=True)
            dist = flow.broadcast_like(dist, shape_tensor)
            tempdist = flow.transpose(dist)
            dist = dist + tempdist
            inputs_t = flow.transpose(inputs)
            dist = addmm(dist, inputs, inputs_t, beta=1, alpha=-2)
            dist = flow.clamp(dist, min_value=1e-12)
            dist = flow.math.sqrt(dist)
        elif self.distance == 'cosine':
            #fnorm=flow.math.l2_normalize(inputs, axis=1)
            fnorm = flow.math.reduce_mean(flow.math.divide(
                inputs, flow.math.l2_normalize(inputs, axis=1)),
                                          axis=1,
                                          keepdims=True)

            expand_fnorm = flow.broadcast_like(fnorm,
                                               like=inputs,
                                               broadcast_axes=[1])
            l2norm = flow.math.divide(inputs, expand_fnorm)
            l2norm_t = flow.transpose(l2norm, perm=(1, 0))
            dist = flow.math.negative(flow.matmul(l2norm, l2norm_t))
        # For each anchor, find the hardest positive and negative
        mask = math.equal(
            flow.broadcast_like(targets, like=shape_tensor,
                                broadcast_axes=[1]),
            flow.transpose(flow.broadcast_like(targets,
                                               like=shape_tensor,
                                               broadcast_axes=[1]),
                           perm=(1, 0),
                           batch_axis_non_change=True))
        mask_rev = math.not_equal(
            flow.broadcast_like(targets, like=shape_tensor,
                                broadcast_axes=[1]),
            flow.transpose(flow.broadcast_like(targets,
                                               like=shape_tensor,
                                               broadcast_axes=[1]),
                           perm=(1, 0),
                           batch_axis_non_change=True))
        dist_ap, dist_an = [], []
        for i in range(n):
            temp_dist = flow.slice_v2(dist, [(i, i + 1, 1)])
            temp_mask = flow.slice_v2(mask, [(i, i + 1, 1)])
            temp_mask_rev = flow.slice_v2(mask_rev, [(i, i + 1, 1)])
            temp_dist_ap = flow.expand_dims(
                math.reduce_max(
                    flow.gather_nd(temp_dist, flow.where(temp_mask))), 0)
            temp_dist_an = flow.expand_dims(
                math.reduce_min(
                    flow.gather_nd(temp_dist, flow.where(temp_mask_rev))), 0)
            dist_ap.append(temp_dist_ap)
            dist_an.append(temp_dist_an)
        dist_ap = flow.concat(dist_ap, 0)
        dist_an = flow.concat(dist_an, 0)
        y = flow.ones_like(dist_an)
        return self._MarginRankingLoss(dist_an, dist_ap, y)
Beispiel #22
0
    def train(self):
        """Implements the training loop for MaskCycleGAN-VC
        """
        for epoch in range(self.start_epoch, self.num_epochs + 1):

            for i, (real_A, mask_A, real_B,
                    mask_B) in enumerate(self.train_dataloader):
                num_iterations = (self.n_samples //
                                  self.mini_batch_size) * epoch + i
                if num_iterations > 10000:
                    self.identity_loss_lambda = 0
                if num_iterations > self.decay_after:
                    self.adjust_lr_rate(self.generator_optimizer,
                                        generator=True)
                    self.adjust_lr_rate(self.generator_optimizer,
                                        generator=False)

                real_A = real_A.to(self.device, dtype=flow.float)
                mask_A = mask_A.to(self.device, dtype=flow.float)
                real_B = real_B.to(self.device, dtype=flow.float)
                mask_B = mask_B.to(self.device, dtype=flow.float)

                # Train Generator
                self.generator_A2B.train()
                self.generator_B2A.train()
                self.discriminator_A.eval()
                self.discriminator_B.eval()
                self.discriminator_A2.eval()
                self.discriminator_B2.eval()

                # Generator Feed Forward
                fake_B = self.generator_A2B(real_A, mask_A)
                cycle_A = self.generator_B2A(fake_B, flow.ones_like(fake_B))
                fake_A = self.generator_B2A(real_B, mask_B)
                cycle_B = self.generator_A2B(fake_A, flow.ones_like(fake_A))
                identity_A = self.generator_B2A(real_A, flow.ones_like(real_A))
                identity_B = self.generator_A2B(real_B, flow.ones_like(real_B))
                d_fake_A = self.discriminator_A(fake_A)
                d_fake_B = self.discriminator_B(fake_B)

                # For Two Step Adverserial Loss
                d_fake_cycle_A = self.discriminator_A2(cycle_A)
                d_fake_cycle_B = self.discriminator_B2(cycle_B)

                # Generator Cycle Loss
                cycleLoss = flow.mean(flow.abs(real_A - cycle_A)) + flow.mean(
                    flow.abs(real_B - cycle_B))

                # Generator Identity Loss
                identityLoss = flow.mean(
                    flow.abs(real_A - identity_A)) + flow.mean(
                        flow.abs(real_B - identity_B))

                # Generator Loss
                g_loss_A2B = flow.mean((1 - d_fake_B)**2)
                g_loss_B2A = flow.mean((1 - d_fake_A)**2)

                # Generator Two Step Adverserial Loss
                generator_loss_A2B_2nd = flow.mean((1 - d_fake_cycle_B)**2)
                generator_loss_B2A_2nd = flow.mean((1 - d_fake_cycle_A)**2)

                # Total Generator Loss
                g_loss = (g_loss_A2B + g_loss_B2A + generator_loss_A2B_2nd +
                          generator_loss_B2A_2nd +
                          self.cycle_loss_lambda * cycleLoss +
                          self.identity_loss_lambda * identityLoss)

                # Backprop for Generator
                self.reset_grad()
                g_loss.backward()
                self.generator_optimizer.step()

                # Train Discriminator
                self.generator_A2B.eval()
                self.generator_B2A.eval()
                self.discriminator_A.train()
                self.discriminator_B.train()
                self.discriminator_A2.train()
                self.discriminator_B2.train()

                # Discriminator Feed Forward
                d_real_A = self.discriminator_A(real_A)
                d_real_B = self.discriminator_B(real_B)
                d_real_A2 = self.discriminator_A2(real_A)
                d_real_B2 = self.discriminator_B2(real_B)
                generated_A = self.generator_B2A(real_B, mask_B)
                d_fake_A = self.discriminator_A(generated_A)

                # For Two Step Adverserial Loss A->B
                cycled_B = self.generator_A2B(generated_A,
                                              flow.ones_like(generated_A))
                d_cycled_B = self.discriminator_B2(cycled_B)

                generated_B = self.generator_A2B(real_A, mask_A)
                d_fake_B = self.discriminator_B(generated_B)

                # For Two Step Adverserial Loss B->A
                cycled_A = self.generator_B2A(generated_B,
                                              flow.ones_like(generated_B))
                d_cycled_A = self.discriminator_A2(cycled_A)

                # Loss Functions
                d_loss_A_real = flow.mean((1 - d_real_A)**2)
                d_loss_A_fake = flow.mean((0 - d_fake_A)**2)
                d_loss_A = (d_loss_A_real + d_loss_A_fake) / 2.0

                d_loss_B_real = flow.mean((1 - d_real_B)**2)
                d_loss_B_fake = flow.mean((0 - d_fake_B)**2)
                d_loss_B = (d_loss_B_real + d_loss_B_fake) / 2.0

                # Two Step Adverserial Loss
                d_loss_A_cycled = flow.mean((0 - d_cycled_A)**2)
                d_loss_B_cycled = flow.mean((0 - d_cycled_B)**2)
                d_loss_A2_real = flow.mean((1 - d_real_A2)**2)
                d_loss_B2_real = flow.mean((1 - d_real_B2)**2)
                d_loss_A_2nd = (d_loss_A2_real + d_loss_A_cycled) / 2.0
                d_loss_B_2nd = (d_loss_B2_real + d_loss_B_cycled) / 2.0

                # Final Loss for discriminator with the Two Step Adverserial Loss
                d_loss = (d_loss_A + d_loss_B) / 2.0 + (d_loss_A_2nd +
                                                        d_loss_B_2nd) / 2.0

                # Backprop for Discriminator
                self.reset_grad()
                d_loss.backward()
                self.discriminator_optimizer.step()

                if (i + 1) % 2 == 0:
                    print(
                        "Iter:{} Generator Loss:{:.4f} Discrimator Loss:{:.4f} GA2B:{:.4f} GB2A:{:.4f} G_id:{:.4f} G_cyc:{:.4f} D_A:{:.4f} D_B:{:.4f}"
                        .format(
                            num_iterations,
                            g_loss.item(),
                            d_loss.item(),
                            g_loss_A2B,
                            g_loss_B2A,
                            identityLoss,
                            cycleLoss,
                            d_loss_A,
                            d_loss_B,
                        ))

            # Save each model checkpoint and validation
            if epoch % self.epochs_per_save == 0 and epoch != 0:
                self.saveModelCheckPoint(epoch, PATH="model_checkpoint")
                self.validation_for_A_dir()
                self.validation_for_B_dir()