Example #1
0
 def infer(self, x, chunk_length=None, chunk_overlap=0):
     with torch.no_grad():
         x = self.get_cond_input(x)
         length = x.shape[-1]
         if chunk_length is None or length <= chunk_length:
             chunks = [x]
         else:
             n = math.ceil(
                 (length - chunk_overlap) / (chunk_length - chunk_overlap))
             chunk_length = math.ceil(length / n) + chunk_overlap
             chunks = [
                 x[..., onset:onset + chunk_length]
                 for onset in range(0, length -
                                    chunk_overlap, chunk_length -
                                    chunk_overlap)
             ]
         audio = []
         for i, xi in enumerate(chunks):
             if xi.device == 'cpu':
                 raise NotImplementedError
             else:
                 from .nv_wavenet.nv_wavenet import Impl
                 xi = self.nv_wavenet.infer(xi, Impl.AUTO)
                 xi = mu_law_decode(xi, self.n_out_channels)
             if i > 0:
                 xi = xi[..., chunk_overlap:]
             audio.append(xi)
         audio = torch.cat(tuple(audio), dim=-1)
         if self.fading is not None:
             assert self.fading in ['half', 'full']
             pad_width = self.upsamp_window - self.upsamp_stride
             if self.fading == 'half':
                 pad_width //= 2
             audio = audio[..., pad_width:]
         return audio
Example #2
0
 def infer(self, x, chunk_length=None, chunk_overlap=0):
     with torch.no_grad():
         x = self.get_cond_input(x)
         x = x.view(x.size(0), self.n_layers, -1, x.size(2))
         # This makes the data channels x batch x num_layers x samples
         x = x.permute(2, 0, 1, 3)
         length = x.shape[-1]
         if chunk_length is None or length <= chunk_length:
             chunks = [x]
         else:
             n = math.ceil(
                 (length - chunk_overlap) / (chunk_length - chunk_overlap))
             chunk_length = math.ceil(length / n) + chunk_overlap
             chunks = [
                 x[..., onset:onset + chunk_length]
                 for onset in range(0, length -
                                    chunk_overlap, chunk_length -
                                    chunk_overlap)
             ]
         audio = []
         for i, xi in enumerate(chunks):
             if xi.device == 'cpu':
                 raise NotImplementedError
             else:
                 from .nv_wavenet.nv_wavenet import Impl
                 xi = self.nv_wavenet.infer(xi, Impl.AUTO)
                 torch.cuda.synchronize(xi.device)
                 xi = mu_law_decode(xi, self.n_out_channels)
             if i > 0:
                 xi = xi[..., chunk_overlap:]
             audio.append(xi)
         audio = torch.cat(tuple(audio), dim=-1)
         return audio
Example #3
0
 def review(self, inputs, outputs):
     predictions, targets = outputs
     ce = torch.nn.CrossEntropyLoss(reduction='none')(predictions, targets)
     summary = dict(
         loss=ce.mean(),
         scalars=dict(),
         histograms=dict(reconstruction_ce=ce),
         audios=dict(target=(inputs[self.audio_key][0], self.sample_rate),
                     decode=(mu_law_decode(
                         torch.argmax(outputs[0][0], dim=0),
                         mu_quantization=self.wavenet.n_out_channels),
                             self.sample_rate)),
         images=dict())
     return summary
Example #4
0
 def infer_gpu(self, x, device=0):
     self.cuda(device)
     from .nv_wavenet.nv_wavenet import Impl
     with torch.no_grad():
         cond_input = self.get_cond_input(x.cuda(device))
         audio = self.nv_wavenet.infer(cond_input, Impl.AUTO)
         audio = mu_law_decode(audio, self.n_out_channels)
     self.cpu()
     if self.fading is not None:
         assert self.fading in ['half', 'full']
         pad_width = self.upsamp_window - self.upsamp_stride
         if self.fading == 'half':
             pad_width //= 2
         audio = audio[..., pad_width:]
     return audio
Example #5
0
 def infer_gpu(self, x):
     self.cuda()
     from .nv_wavenet.nv_wavenet import Impl
     cond_input = self.get_cond_input(x.cuda())
     audio = self.nv_wavenet.infer(cond_input, Impl.AUTO)
     return mu_law_decode(audio, self.n_out_channels)