def read_aud_sim(self, fname):
        # VAD = webrtcvad.Vad(self.config['VAD_MODE'])
        # aud, sr = librosa.load(fname, sr=None, mono=True)
        aud = preprocess(fname)
        # soundfile.write('data/vis/{}.wav'.format(fname.split('/')[-1].split('.')[0]),aud,16000)
        mel = torchaudio.transforms.MelSpectrogram(
            n_mels=self.config['MEL_CHANNELS'],
            n_fft=1024,
            hop_length=256,
            win_length=1024)(torch.Tensor(aud))

        return mel
 def infer(self, fname, cpt):
     aud = preprocess(fname)
     embeds = encoder.embed(aud)
     return embeds