def create_dataset(speechfolder, peaksfolder, window, stride, file_slice=slice(0, 10)): speechfiles = sorted(glob(os.path.join(speechfolder, '*.npy')))[file_slice] peakfiles = sorted(glob(os.path.join(peaksfolder, '*.npy')))[file_slice] speech_data = [np.load(f) for f in speechfiles] peak_data = [np.load(f) for f in peakfiles] speech_data = np.concatenate(speech_data) peak_data = np.concatenate(peak_data) indices = np.arange(len(speech_data)) speech_windowed_data = strided_app(speech_data, window, stride) peak_windowed_data = strided_app(peak_data, window, stride) indices = strided_app(indices, window, stride) peak_distance = np.array([ np.nonzero(t)[0][0] if len(np.nonzero(t)[0]) != 0 else -1 for t in peak_windowed_data ]) peak_indicator = (peak_distance != -1) * 1.0 return speech_windowed_data, peak_distance, peak_indicator, indices, peak_data
def custom_loader(speechfolder, eggfolder, window, stride, select=None): speechfiles = sorted(glob(os.path.join(speechfolder, "*.npy"))) eggfiles = sorted(glob(os.path.join(eggfolder, "*.npy"))) if select is not None: ind = np.random.permutation(len(speechfiles)) ind = ind[:select] speechfiles = [speechfiles[i] for i in ind] eggfiles = [eggfiles[i] for i in ind] print("Selected {} files".format(select)) speech_data = [np.load(f) for f in speechfiles] egg_data = [np.load(f) for f in eggfiles] for i in range(len(egg_data)): egg_data[i] = egg_data[i] / np.max(np.abs(egg_data[i])) for i in range(len(speech_data)): speech_data[i] = speech_data[i] / np.max(np.abs(speech_data[i])) speech_data = np.concatenate(speech_data) egg_data = np.concatenate(egg_data) speech_windowed_data = strided_app(speech_data, window, stride) egg_windowed_data = strided_app(egg_data, window, stride) return speech_windowed_data, egg_windowed_data
def _create_dataset(speechfiles, peakfiles, window, stride): speech_data = [np.load(f) for f in speechfiles] peak_data = [np.load(f) for f in peakfiles] speech_data = np.concatenate(speech_data) peak_data = np.concatenate(peak_data) speech_windowed_data = strided_app(speech_data, window, stride) peak_windowed_data = strided_app(peak_data, window, stride) peak_distance = np.array([ np.nonzero(t)[0][0] if len(np.nonzero(t)[0]) != 0 else -1 for t in peak_windowed_data ]) peak_indicator = (peak_distance != -1) * 1.0 return speech_windowed_data, peak_distance, peak_indicator
def create_dataset(speechfolder, peaksfolder, window, stride, file_slice=slice(0, 10)): speechfiles = sorted(glob(os.path.join(speechfolder, '*.npy')))[file_slice] peakfiles = sorted(glob(os.path.join(peaksfolder, '*.npy')))[file_slice] # speechfiles = speechfiles[:10] # peakfiles = peakfiles[:10] speech_data = [np.load(f) for f in speechfiles] peak_data = [np.load(f) for f in peakfiles] speech_data = np.concatenate(speech_data) peak_data = np.concatenate(peak_data) indices = np.arange(len(speech_data)) speech_windowed_data = strided_app(speech_data, window, stride) peak_windowed_data = strided_app(peak_data, window, stride) indices = strided_app(indices, window, stride) return speech_windowed_data, indices, peak_data
def update_lists(self): self.example_list = [] for i, clss in enumerate(self.class2file): clss_file_list = np.random.permutation(self.class2file[clss]) idxs = strided_app(np.arange(len(clss_file_list)), 4, 4) for idxs_list in idxs: if len(idxs_list) == 4: self.example_list.append( [clss_file_list[file_idx] for file_idx in idxs_list]) self.example_list[-1].append(clss) self.example_list[-1].append(self.clss2label[clss])