def get_sentence_data(self, speaker, sid, spec=False): # Because the TIMIT dataset uses the NIST SPHERE header we # first convert it into a standard WAV if we have not already spkr = self.speakers[speaker] part = "TRAIN" if spkr["use"] == "TRN" else "TEST" gend = spkr["sex"] dr = "DR" + spkr["dr"] folder = os.path.join(self.full_path, part, dr, gend + speaker) phn_file = os.path.join(folder, sid + ".PHN") sph_wav_file = os.path.join(folder, sid + ".WAV") wav_file = os.path.join(folder, sid + ".REALWAV") if not os.path.exists(wav_file): sph = SPHFile(sph_wav_file) sph.write_wav(wav_file) wrd_file = os.path.join(folder, sid + ".WRD") spec_ext = "_{}_{}_{}".format(self.fft_size, self.window_size, self.thresh) spec_file = os.path.join(folder, sid + ".SPEC" + spec_ext) data = None if spec: if not os.path.exists(spec_file + ".npy"): wav = self._wav(wav_file)[1] data = wav_spectrogram = spectrogram( wav.astype('float64'), fft_size=self.fft_size*2, step_size=self.window_size, log=True, thresh=self.thresh ) # np.save(spec_file, wav_spectrogram) # else: # data = np.load(spec_file + ".npy") else: data = self._wav(wav_file) return data, self._wrd(wrd_file), self._phn(phn_file)
def wav_converter(self): dialects_path = self.dataset_path dialects = os.listdir(dialects_path) for dialect in dialects: dialect_path = os.path.join(dialects_path, dialect) speakers = os.listdir(path=dialect_path) for speaker in speakers: speaker_path = os.path.join(dialect_path, speaker) speaker_recordings = os.listdir(path=speaker_path) wav_files = glob2.glob(speaker_path + '/*.WAV') for wav_file in wav_files: sph = SPHFile(wav_file) txt_file = "" txt_file = wav_file[:-3] + "TXT" f = open(txt_file, 'r') for line in f: words = line.split(" ") start_time = (int(words[0]) / 16000) end_time = (int(words[1]) / 16000) print("writing file ", wav_file) sph.write_wav(wav_file.replace(".WAV", ".wav"), start_time, end_time)
def process_wav(path, wav_file): assert (wav_file[-3:] == 'wav' ), 'Wrong file name, should be a wav: {}'.format(wav_file) sphere_file_path = os.path.join(path, wav_file) if not (Path(TMP_PATH).exists()): os.mkdir(TMP_PATH) wav_copy_file_path = os.path.join(TMP_PATH, wav_file) + '_readable' sph = SPHFile(sphere_file_path) sph.write_wav(wav_copy_file_path) FS, wav = wavfile.read(wav_copy_file_path) os.remove(wav_copy_file_path) mfccs = mfcc(wav, FS, winstep=window_step_in_second, winlen=window_size_in_second) fbs, energy = fbank(wav, FS, nfilt=n_filter_bank, winstep=window_step_in_second, winlen=window_size_in_second) fbs_with_energy = np.concatenate([fbs, energy[:, None]], axis=1) return mfccs, fbs_with_energy, wav, FS
def move_file_2_wav_n(self, remove_flag=False): if not remove_flag: train_videos = self.get_dir_frombase(self.org_train) print("fine to load train_data's name, length is {}".format(len(train_videos))) test_videos = self.get_dir_frombase(self.org_test) print("fine to load test_data's name, length is {}".format(len(test_videos))) all_videos = train_videos + test_videos if os.path.exists(self.targ_dir): if len(os.listdir(self.targ_dir)) or "1" + self.sub_add + ".wav" in os.listdir(self.targ_dir): print("The file might be exsist this function {} might not work" .format(self.move_file_2_wav_n.__name__)) return else: os.mkdir(self.targ_dir) for i, fp in enumerate(all_videos): shutil.copy(fp, self.targ_dir + str(i) + ".WAV") for i in range(len(os.listdir(self.targ_dir))): fp = self.targ_dir + "/" + str(i) + ".wav" sph = SPHFile(fp) sph.write_wav(filename=fp.replace(".wav", self.sub_add + ".wav")) print("fin {}".format(i)) else: a = input("the dir:{} will be remove".format(self.targ_dir)) if a: try: os.chmod(self.targ_dir, stat.S_IWOTH) os.remove(self.targ_dir) except PermissionError: print("Permission is dine,after use chomod try to run with sudo") return
def __getitem__(self, idx): if torch.is_tensor(idx): idx = idx.tolist() audio_filename = self.audio_filenames[idx] try: sr, y = io.wavfile.read(self.audio_root_path + audio_filename) except: if audio_filename.split('.')[-1] == 'wav': audio_filename_sph = '.'.join( audio_filename.split('.')[:-1] + ['WAV']) sph = SPHFile(self.audio_root_path + audio_filename_sph) sph.write_wav(self.audio_root_path + audio_filename) sr, y = io.wavfile.read(self.audio_root_path + audio_filename) y = preemphasis(y, self.coeff) n_fft = int(self.window_ms * sr / 1000) hop_length = int(self.skip_ms * sr / 1000) mfcc = librosa.feature.mfcc(y, sr=sr, n_mfcc=self.n_mfcc, dct_type=self.dct_type, n_fft=n_fft, hop_length=hop_length) mfcc -= np.mean(mfcc) mfcc /= max(np.sqrt(np.var(mfcc)), EPS) nframes = min(mfcc.shape[1], self.max_nframes) mfcc = self.convert_to_fixed_length(mfcc) mfcc = mfcc.T # labels = [self.phone2idx[phone_seq[i]] for i in range(min(self.max_nphones, len(phone_seq)))] # TODO # if self.compute_cmvn: return torch.FloatTensor(mfcc), nframes
def read_audio(current_file, sample_rate=None, mono=True): """Read audio file Parameters ---------- current_file : dict Dictionary given by pyannote.database. sample_rate: int, optional Target sampling rate. Defaults to using native sampling rate. mono : int, optional Convert multi-channel to mono. Defaults to True. Returns ------- y : (n_samples, n_channels) np.array Audio samples. sample_rate : int Sampling rate. Notes ----- In case `current_file` contains a `channel` key, data of this (1-indexed) channel will be returned. """ # sphere files if current_file['audio'][-4:] == '.sph': # dump sphere file to a temporary wav file # and load it from here... from sphfile import SPHFile sph = SPHFile(current_file['audio']) with tempfile.NamedTemporaryFile() as f: sph.write_wav(f.name) y, sample_rate = librosa.load(f.name, sr=sample_rate, mono=False) # all other files else: y, sample_rate = librosa.load(current_file['audio'], sr=sample_rate, mono=False) # reshape mono files to (1, n) [was (n, )] if y.ndim == 1: y = y.reshape(1, -1) # extract specific channel if requested channel = current_file.get('channel', None) if channel is not None: y = y[channel - 1, :] # convert to mono if mono: y = librosa.to_mono(y) return y.T, sample_rate
def load_audio(self, idx): idx = self.keep_indices[idx] # Extract segment-level acoustic features self.n_mfcc = self.configs.get('n_mfcc', 40) self.coeff = self.configs.get('coeff', 0.97) self.dct_type = self.configs.get('dct_type', 3) self.skip_ms = self.configs.get('skip_size', 10) self.window_ms = self.configs.get('window_len', 25) phone_boundary = np.zeros((2, self.max_nframes+1)) nphones = 0 for i_s, segment in enumerate(self.segmentations[idx]): start_ms, end_ms = segment start_frame, end_frame = int(float(start_ms) / 10), int(float(end_ms) / 10) if end_frame > self.max_nframes: break phone_boundary[0, start_frame] = 1. phone_boundary[1, end_frame] = 1. if self.audio_root_path.split('.')[-1] == 'json': # Assume kaldi format if audio_root_path is a json file mfcc = kaldiio.load_mat(self.audio_keys[idx]) nframes = min(mfcc.shape[1], self.max_nframes) mfcc = self.convert_to_fixed_length(mfcc.T) else: audio_filename = '{}.wav'.format(self.audio_keys[idx]) try: sr, y_wav = wavfile.read('{}/{}'.format(self.audio_root_path, audio_filename)) except: if audio_filename.split('.')[-1] == 'wav': audio_filename_sph = '.'.join(audio_filename.split('.')[:-1]+['WAV']) sph = SPHFile(self.audio_root_path + audio_filename_sph) sph.write_wav(self.audio_root_path + audio_filename_sph) sr, y_wav = wavfile.read(self.audio_root_path + audio_filename) y_wav = preemphasis(y_wav, self.coeff) n_fft = int(self.window_ms * sr / 1000) hop_length = int(self.skip_ms * sr / 1000) # mfcc = librosa.feature.mfcc(y_wav, sr=sr, n_mfcc=self.n_mfcc, dct_type=self.dct_type, n_fft=n_fft, hop_length=hop_length) mfcc = librosa.feature.melspectrogram(y_wav, sr=sr, n_mels=self.n_mfcc, n_fft=n_fft, hop_length=hop_length) mfcc -= np.mean(mfcc) mfcc /= max(np.sqrt(np.var(mfcc)), EPS) nframes = min(mfcc.shape[1], self.max_nframes) mfcc = self.convert_to_fixed_length(mfcc) mfcc = torch.FloatTensor(mfcc) if phone_boundary.sum() == 0: print('Warning: Caption {} with id {} is empty'.format(idx, self.audio_keys[idx])) nphones = 1 phone_boundary = torch.FloatTensor(phone_boundary) if self.return_boundary: return mfcc, phone_boundary else: return mfcc, nframes
def GetArrayFromWAV(filename): with open(filename, 'rb') as wavFile: header = wavFile.read(4) if header == b'RIFF': # RIFF header, for WAVE files framerate, wavArray = WavFileTool.read(filename) else: # NIST header, which uses SPHERE file = SPHFile(filename) framerate = file.format['sample_rate'] wavArray = numpy.zeros(len(file.time_range()), dtype=numpy.int16) for i, value in enumerate(file.time_range()): wavArray[i] = value return framerate, wavArray
def nist2wav(wav_file): sph = SPHFile(wav_file) txt_file = "" txt_file = wav_file[:-3] + "TXT" f = open(txt_file, 'r') for line in f: words = line.split(" ") start_time = (int(words[0]) / 16000) end_time = (int(words[1]) / 16000) print("writing file ", wav_file) sph.write_wav(wav_file.replace(".WAV", ".wav"), start_time, end_time)
def convert_and_save(meta, speech_samples, path, mode): if not os.path.exists(os.path.join(path, mode, 'speech')): os.makedirs(os.path.join(path, mode, 'speech'), exist_ok=True) for index, row in meta.iterrows(): name = 'sw0' + row['sent_id'].split('_')[0][-4:] fpath = [fname for fname in speech_samples if name in fname][0] sph = SPHFile(fpath) sph.write_wav(os.path.join(path, mode, 'speech', row['sent_id'] + '.wav'), row['start_time'], row['end_time'])
def Pattern_File_Generate_from_SPH(path, text_List, token_Index_Dict, dataset, spectral_Subtract=False, display_Prefix='', range_Ignore=False): sph_Loader = SPHFile(path) os.makedirs(os.path.join(tempfile.gettempdir(), 'mstts').replace('\\', '/'), exist_ok=True) for index, (start_Time, end_Time, text) in enumerate(text_List): temp_Wav_Path = os.path.join( tempfile.gettempdir(), 'mstts', '{}.{}.wav'.format( os.path.splitext(os.path.basename(path))[0], index)).replace('\\', '/') sph_Loader.write_wav(temp_Wav_Path, start_Time, end_Time) mel = Mel_Generate(temp_Wav_Path) if mel is None: print('[{}]'.format(display_Prefix), '{} {}-{}'.format(path, start_Time, end_Time), '->', 'Ignored because of length.') return token = np.array([token_Index_Dict[letter] for letter in text]).astype(np.int32) new_Pattern_Dict = { 'Token': token, 'Mel': mel, 'Text': text, 'Dataset': dataset, } pickle_File_Name = '{}.{}.{}.PICKLE'.format( dataset, os.path.splitext(os.path.basename(path))[0], index).upper() with open( os.path.join(hp.Train.Pattern_Path, pickle_File_Name).replace("\\", "/"), 'wb') as f: pickle.dump(new_Pattern_Dict, f, protocol=2) os.remove(temp_Wav_Path) print('[{}]'.format(display_Prefix), '{} {}-{}'.format(path, start_Time, end_Time), '->', '{}'.format(pickle_File_Name))
def timit_sph2wav(path): # to transform the dataset from sph file to .wav file if os.path.exists(path + r'TIMIT/done.txt'): print('The dataset has been already preprocessed') else: sph_files = glob.glob(path + r'TIMIT/*/*/*/*.WAV') for file_path in sph_files: sph = SPHFile(file_path) sph.write_wav(filename=file_path.replace('.WAV', 'copy.WAV')) os.remove(file_path) os.rename(file_path.replace('.WAV', 'copy.WAV'), file_path) with open(path + r'TIMIT/done.txt', 'w') as f: f.write('The dataset has been preprocessed') print('Done') return None
def from_sphere(sph_path: Pathlike, relative_path_depth: Optional[int] = None) -> 'Recording': """ Read a SPHERE file's header and create the corresponding ``Recording``. :param sph_path: Path to the sphere (.sph) file. :param relative_path_depth: optional int specifying how many last parts of the file path should be retained in the ``AudioSource``. By default writes the path as is. :return: a new ``Recording`` instance pointing to the sphere file. """ from sphfile import SPHFile sph_path = Path(sph_path) sphf = SPHFile(sph_path) return Recording( id=sph_path.stem, sampling_rate=sphf.format['sample_rate'], num_samples=sphf.format['sample_count'], duration=sphf.format['sample_count'] / sphf.format['sample_rate'], sources=[ AudioSource( type='file', channels=list(range(sphf.format['channel_count'])), source=('/'.join(sph_path.parts[-relative_path_depth:]) if relative_path_depth is not None and relative_path_depth > 0 else str(sph_path))) ])
def cache_sph2wav(self): """ Converts an audio file in SPH format to WAV format, for sending to Google Cloud Speech API) """ wav_cache_dir = os.path.join(os.getcwd(), 'datacache', 'speech_objects','wav/') if not os.path.exists(wav_cache_dir): os.makedirs(wav_cache_dir, exist_ok=True) cache_file = os.path.join(wav_cache_dir, '{}.wav'.format(self._speech_id.strip())) if not os.path.exists(cache_file): sph =SPHFile(self._source_file) # write out a wav file with content from {start_time} to {stop_time} seconds sph.write_wav(cache_file, self._start_time, self._stop_time) return cache_file
def NIST_to_wav(input_directory, output_directory): """ convert files in directory to wav files Parameters ---------- input_directory: str directory matching all files to be converted output_directory: str directory to output all converted files """ files = glob.glob(input_directory) for f in files: sph = SPHFile(f) name = f[f.rfind('/') + 1: f.rfind('.')] sph.write_wav(output_directory + name + ".wav")
def transform_spkinfo(targetfp, spk_list): for i in os.listdir(targetfp): local_fp = targetfp + "/" + i for j in os.listdir(local_fp): counter = 0 spk_index = spk_list.index(j) print(spk_index) audios = os.listdir(targetfp + "/" + i + "/" + j) for k in audios: if ".WAV" in k: subfp = targetfp + "/" + i + "/" + j + "/" + k sph = SPHFile(subfp) sph.write_wav(filename="./all_wav/" + str(spk_index) + "_" + str(counter) + ".wav") counter += 1 if counter != 10: raise ValueError("Not enough speech")
def load_sphfile(path, sampling_rate, frame_time, hop_time): """Return a Wav instance based on the data stored in a Sphere file.""" # Build a temporary copy of the file, converted to actual waveform format. tmp_path = './' + os.path.basename(path[:-4]) + '_tmp.wav' SPHFile(path).write_wav(tmp_path) # Load data from the waveform file, and then remove the latter. wav = Wav(tmp_path, sampling_rate, frame_time, hop_time) os.remove(tmp_path) return wav
def convert_wav(path): """convert nist sphere file to wav file Args: path: path to dialect folder in timit directory """ speaker_list = os.listdir(path) speaker_list = list(map(lambda x: os.path.join(path, x), speaker_list)) for speaker in speaker_list: # list of files in a speaker folder sentence_list = list( map(lambda x: os.path.join(speaker, x), os.listdir(speaker))) # a folder to store the converted files wav_folder = os.path.join(speaker, 'wav_files') if os.path.exists(wav_folder): continue os.makedirs(wav_folder) for f in sentence_list: if re.match(r'.*\.WAV', f): name = f.split(os.path.sep)[-1] sph = SPHFile(f) sph.write_wav(os.path.join(wav_folder, name))
def main(): source_directory = sys.argv[1] destination_directory = sys.argv[2] files = [] for (path, dirs, filename) in os.walk(source_directory): files.extend(filename) for f in files: sph = SPHFile(os.path.join(source_directory, f)) dest = f.split('.')[0] + '.wav' create_wav(sph, os.path.join(destination_directory, dest))
def copy_wavs(dest_folder): """Copy wav files, converting them from sph file format on the go.""" utterances = [] for speaker in SPEAKERS: folder = os.path.join(CONSTANTS['mocha_raw_folder'], speaker) spk_utterances = sorted( [name for name in os.listdir(folder) if name.endswith('.wav')]) for filename in spk_utterances: SPHFile(os.path.join(folder, filename)).write_wav( os.path.join(dest_folder, filename)) utterances.extend(spk_utterances) # Return the list of copied utterances. return [name[:-4] for name in utterances]
def convert(args): PAUSE_MATCH = re.compile(r'\([0-9]\)') NOTATION = re.compile(r'\{[A-Z]*\}') print('Converting .sph to wav') # splits = ['test'] labels = [] root = os.path.join(args.save_path, 'TEDLIUM_release-3', 'data') wav_dir = os.path.join(root, 'wav') os.makedirs(wav_dir, exist_ok=True) sph_files = sorted(list(glob.glob(os.path.join(root, 'sph/*.sph')))) with tqdm(sph_files, dynamic_ncols=True, desc="data") as pbar: for sph_file in pbar: sph = SPHFile(sph_file) stm_file = sph_file.replace('sph', 'stm') with open(stm_file, 'r') as f: for idx, line in enumerate(f.readlines()): tokens = line.split(' ') start, end = float(tokens[3]), float(tokens[4]) name = tokens[0] text = line.split('male> ')[-1] text = text.split('unknown> ')[-1] text = text.split('NA> ')[-1] text = text.replace('<sil>', '') text = text.replace('<unk>', '') text = text.split('(' + name)[0] text = PAUSE_MATCH.sub('', text) text = NOTATION.sub('', text) text = text.strip() text = ' '.join(text.split()) wav_filename = '%s_%d.wav' % (name, idx) assert ' ' not in wav_filename sph.write_wav(os.path.join(wav_dir, wav_filename), start, end) labels.append('%s %s' % (wav_filename, text)) with open(os.path.join(wav_dir, 'labels.txt'), 'w') as f: f.write('\n'.join(labels))
def timit_trans(): # 下载的TIMIT可能无法直接使用,需要用此脚本转换一下 path = '/home/lisen/uestc/Research/Dataset/TIMIT/TRAIN/*/*/*.WAV' sph_files = glob.glob(path) print(len(sph_files), "train utterences") for i in sph_files: sph = SPHFile(i) sph.write_wav(filename=i.replace(".WAV", "_.wav")) # _不能删除 os.remove(i) path = '/home/lisen/uestc/Research/Dataset/TIMIT/TEST/*/*/*.WAV' sph_files_test = glob.glob(path) print(len(sph_files_test), "test utterences") for i in sph_files_test: sph = SPHFile(i) sph.write_wav(filename=i.replace(".WAV", "_.wav")) # _不能删除 os.remove(i) print("Completed")
def get_length_of_sph_file(sph_path): """ Extracts the duration of a sph file in seconds ```python import pathlib from openspeechlib.utils.audio import get_length_of_sph_file total_length_of_ciempiess = 0 for path in pathlib.Path('/mnt/16810535-988c-440c-a794-1c9b98899844/master_thesis/corpus/02_CIEMPIESS_SPH/train').rglob('*.sph'): total_length_of_ciempiess += get_length_of_sph_file(path.absolute()) print(total_length_of_ciempiess) ``` :param sph_path: :return: """ try: sph_file = SPHFile(sph_path) sph_file.open() return sph_file.format['sample_count'] / sph_file.format['sample_rate'] except FileNotFoundError: LOGGER.error("File not found")
def make_recording_callhome( sph_path: Pathlike, recording_id: Optional[str] = None, relative_path_depth: Optional[int] = None, sph2pipe_path: Optional[Pathlike] = None ) -> Recording: """ This function creates manifests for CallHome recordings that are compressed with shorten, a rare and mostly unsupported codec. You will need to install sph2pipe (e.g. using Kaldi) in order to read these files. """ try: from sphfile import SPHFile except ImportError: raise ImportError("Please install sphfile (pip install sphfile) instead and " "try preparing CallHome English again.") if sph2pipe_path is None: sph2pipe_path = 'sph2pipe' else: sph2pipe_path = str(sph2pipe_path).strip() sph_path = Path(sph_path) sphf = SPHFile(sph_path) return Recording( id=recording_id if recording_id is not None else sph_path.stem, sampling_rate=sphf.format['sample_rate'], num_samples=sphf.format['sample_count'], duration=sphf.format['sample_count'] / sphf.format['sample_rate'], sources=[ AudioSource( type='command', channels=list(range(sphf.format['channel_count'])), source=f'{sph2pipe_path} -f wav -p ' + ( '/'.join(sph_path.parts[-relative_path_depth:]) if relative_path_depth is not None and relative_path_depth > 0 else str(sph_path) ) ) ] )
for root, _, fnames in sorted(os.walk(args.timit_dir)): spk = root.split("/")[-1].lower() if spk in dt_spks: f = dt_f elif spk in tt_spks: f = tt_f else: f = tr_f for fname in fnames: if fname.endswith(".wav") or fname.endswith(".WAV"): sph_path = "%s/%s" % (root, fname) path = "%s/%s_%s" % (wav_dir, spk, fname) uttid = "%s_%s" % (spk, os.path.splitext(fname)[0]) f.write("%s %s\n" % (uttid, path)) sph = SPHFile(sph_path) write_wav(path, sph) tr_f.close() dt_f.close() tt_f.close() print "converted to wav and dumped scp files" # compute feature feat_dir = os.path.abspath("%s/%s" % (args.out_dir, args.ftype)) maybe_makedir(feat_dir) def compute_feature(name): cmd = [
def sph_to_wav(dirpath, filename, output_path): full_path = os.path.join(dirpath, filename) new_path = os.path.join(output_path, filename[:-3] + ".wav") sph = SPHFile(full_path) sph.write_wav(new_path) return new_path
from sphfile import SPHFile import glob import os """"Convert SPH file to wav""" dialects_path = "/home/marcin/Pobrane/TIMIT" root_dir = os.path.join(dialects_path, '**/*.WAV') wav_files = glob.glob(root_dir, recursive=True) for wav_file in wav_files: sph = SPHFile(wav_file) txt_file = "" txt_file = wav_file[:-3] + "TXT" f = open(txt_file,'r') for line in f: words = line.split(" ") start_time = (int(words[0])/16000) end_time = (int(words[1])/16000) print("writing file ", wav_file) sph.write_wav(wav_file.replace(".WAV",".wav"),start_time,end_time)
from sphfile import SPHFile parser = argparse.ArgumentParser() parser.add_argument("src_dir") parser.add_argument("dest_dir") args = parser.parse_args() src = Path(args.src_dir) dest = Path(args.dest_dir) for file_sph in (src / "sph").iterdir(): file_stm = src / "stm" / file_sph.name.replace(".sph", ".stm") dest.mkdir(parents=True, exist_ok=True) sph = SPHFile(str(file_sph)) sph.write_wav(str(dest / file_sph.name.replace(".sph", ".wav"))) with open(file_stm, "r") as f: output = [] ground_truth = [] for line in f.readlines(): line_parts = line.split() line = " ".join(line_parts[6:]) if line != "ignore_time_segment_in_scoring": output.append(line) ground_truth.append(" ".join([line_parts[3], line_parts[4]] + line_parts[6:])) with open(str(dest / file_sph.name.replace(".sph", ".txt")), "w") as f: f.write("\n".join(output))
def load_sph(f): from sphfile import SPHFile sph = SPHFile(f) return (sph.format['sample_rate'], sph.content)
import os import sys #sys.path.append("tracker/deep_sort/deep_sort") import numpy as np from path import Path from sphfile import SPHFile for file_path in Path('speech_TEST/TIMIT_TEST').walkfiles('*.WAV'): save_path = os.path.dirname(os.path.dirname( os.path.dirname(file_path))) + '_WAV/' + os.path.basename( os.path. dirname(os.path.dirname(file_path))) + '/' + os.path.basename( os.path.dirname(file_path)) + '/' + os.path.basename(file_path) try: if not os.path.exists(os.path.dirname(save_path)): os.makedirs(os.path.dirname(save_path)) except OSError: print('Error: Creating directory of data') sph = SPHFile(file_path) sph.write_wav(save_path)