def wav_loader(path): #print(path) npys = np.zeros((300, 64)) temp = audio_processing.mk_MFB(path) l = temp.shape[0] if l <= 300: npys[:l, :] = temp npys[l:, :] = temp[:300 - l, :] else: randint = np.random.randint(l - 300) npys = temp[randint:randint + 300, :] mu = np.average(npys) sigma = np.std(npys) npys = (npys - mu) / max(sigma, 0.001) #npys = (npys - np.min(npys))/(np.max(npys)-np.min(npys)) return npys
def forward(self, anchor, positive, negative): d_p = self.pdist.forward(anchor, positive) d_n = self.pdist.forward(anchor, negative) dist_hinge = torch.clamp(self.margin + d_p - d_n, min=0.0) loss = torch.mean(dist_hinge) return loss kwargs = {'num_workers': 0, 'pin_memory': True} if args.cuda else {} l2_dist = PairwiseDistance(2) voxceleb = read_voxceleb_structure(args.dataroot) if args.makemfb: for datum in voxceleb.iterrows(): mk_MFB((args.dataroot +'/voxceleb1_wav/' + datum[1]['filename']+'.wav')) if args.mfb: transform = transforms.Compose([ truncatedinputfromMFB(), totensor() ]) file_loader = read_MFB else: transform = transforms.Compose([ truncatedinput(), toMFB(), totensor(), #tonormal() ]) file_loader = read_audio