def prep_feats(data_, seg_len=300, delta=False): features = data_.T idxs = features.shape[1] if features.shape[1] < seg_len: mul = int(np.ceil(seg_len / features.shape[1])) features = np.tile(features, (1, mul)) features = features[:, :seg_len] idxs = strided_app( np.arange(features.shape[1]), seg_len, max(min(seg_len // 2, abs(seg_len - (features.shape[1] + 1))), 1)) features = features[np.newaxis, :, :] if delta: features = np.concatenate([ features, delta_(features, width=3, order=1), delta_(features, width=3, order=2) ], axis=0) feature_list = [] for idx in idxs: feature_list.append( torch.from_numpy(features[np.newaxis, :, :, idx]).float()) return torch.cat(feature_list, 0)
def prep_feats(data_, max_dur, max_nchunks, delta=False): ''' data_ : [T, ncoef] ''' features = (data_.T)[np.newaxis, np.newaxis, :, :] if features.shape[-1] < 50: mul = int(np.ceil(50 / features.shape[-1])) features = np.tile(features, (1, 1, 1, mul)) features = features[..., :50] elif features.shape[-1] > max_dur: sliced_data = [] idxs = strided_app(np.arange(features.shape[-1]), max_dur, max_dur // 2) for idx in idxs: print(idx) sliced_data.append(features[..., idx]) features = np.concatenate(sliced_data, axis=0) if delta: features = np.concatenate([ features, delta_(features, width=3, order=1), delta_(features, width=3, order=2) ], axis=1) return torch.from_numpy( features).float()[:min(features.shape[0], max_nchunks), ...]
def prep_feats(data_, delta=False): features = data_.T if features.shape[1]<50: mul = int(np.ceil(50/features.shape[1])) features = np.tile(features, (1, mul)) features = features[:, :50] features = features[np.newaxis, :, :] if delta: features = np.concatenate([features, delta_(features,width=3,order=1), delta_(features,width=3,order=2)], axis=0) return torch.from_numpy(features[np.newaxis, :, :, :]).float()
def prep_utterance(self, data): if data.shape[-1]>self.max_nb_frames: ridx = np.random.randint(0, data.shape[-1]-self.max_nb_frames) data_ = data[:, :, ridx:(ridx+self.max_nb_frames)] else: mul = int(np.ceil(self.max_nb_frames/data.shape[-1])) data_ = np.tile(data, (1, 1, mul)) data_ = data_[:, :, :self.max_nb_frames] if self.delta: data_ = np.concatenate([data_, delta_(data_,width=3,order=1), delta_(data_,width=3,order=2)], axis=0) data_ = torch.from_numpy(data_).float().contiguous() return data_
def prep_utterance(self, data): if data.shape[2] > self.max_nb_frames: ridx = np.random.randint(0, data.shape[2] - self.max_nb_frames) data_ = data[:, :, ridx:(ridx + self.max_nb_frames)] else: mul = int(np.ceil(self.max_nb_frames / data.shape[0])) data_ = np.tile(data, (1, 1, mul)) data_ = data_[:, :, :self.max_nb_frames] if self.delta: data_ = np.concatenate([ data_, delta_(data_, width=3, order=1), delta_(data_, width=3, order=2) ], axis=0) return data_