def _generate_index(self): target_file = open(os.path.join(self._dir, self._name), "w") self._make_dirs() #only get file names files = [_ for _ in os.walk(os.path.join(self._dir, "in"))][0][-1] for file in files: #resample to 16kHz and place resampled in out dir sox("sox {0} {1} rate 16000".format( os.path.join(self._dir, "in", file), os.path.join(self._dir, "out", file)).split()) #copy each resampled file and overwrite the high sample rate file shutil.copy(os.path.join(self._dir, "out", file), os.path.join(self._dir, "in", file)) #find the frequencies as numbers in the file name. most of these will be '##.' if so remove '.' note = file[12:15] if note[-1] == '.': note = note[:-1] #this means output freq isn't in dataset if int(note) > 96: continue #octaves are 12 semitones apart. thus have output be 12 higher print(file[:12] + "{0}.wav,".format(int(note)) + file[:12] + "{0}.wav".format(int(note) + 12), file=target_file) target_file.close()
def splitFile(fileName): args = [ fileName, 'outfile.wav', 'silence', '1', '0.1', '0.1', '1', '0.1', '0.1', ': newfile', ': restart' ] # Calls the silence argument to `sox` # - args is a list of arguments passed # - Will create set of wav files in working directory sequentially # named outfile (outfile001, outfile002, ...) # - Starts splitting audio when .1 seconds of 1% of volume is detected # - Stops splitting audio after .1 seconds of silence is detected # - Loops through entire file, so can do whole captcha message in one go! sx.sox(args)
def test_sox_fail_bad_args(self): args = ['-asdf'] expected_status = 1 actual_status, actual_out, acutal_err = core.sox(args) self.assertEqual(expected_status, actual_status) self.assertNotEqual('', actual_out) self.assertNotEqual('', acutal_err)
def test_sox_fail_corrupt_file(self): args = [INPUT_FILE_CORRUPT, OUTPUT_FILE] expected_status = 2 expected_out = '' actual_status, actual_out, acutal_err = core.sox(args) self.assertEqual(expected_status, actual_status) self.assertEqual(expected_out, actual_out) self.assertNotEqual('', acutal_err)
def test_sox_fail_bad_ext(self): args = ['input.wav', 'output.xyz'] expected_status = 2 expected_out = '' actual_status, actual_out, acutal_err = core.sox(args) self.assertEqual(expected_status, actual_status) self.assertEqual(expected_out, actual_out) self.assertNotEqual('', acutal_err)
def test_sox_fail_bad_files(self): args = ['asdf.wav', 'flululu.wav'] expected_status = 2 expected_out = '' actual_status, actual_out, acutal_err = core.sox(args) self.assertEqual(expected_status, actual_status) self.assertEqual(expected_out, actual_out) self.assertNotEqual('', acutal_err)
def test_src_array_invalid(self): args = ['input.wav', 'output.xyz'] arr = 'not a numpy array' expected_status = 1 actual_status, _, _ = core.sox(args, arr) self.assertEqual(expected_status, actual_status)
def test_base_case2(self): args = [INPUT_FILE, OUTPUT_FILE] expected = (0, '', '') actual = core.sox(args) self.assertEqual(expected, actual)
def test_base_case_pathlib(self): args = ['sox', Path(INPUT_FILE), Path(OUTPUT_FILE)] expected = (0, '', '') actual = core.sox(args) self.assertEqual(expected, actual)
os.stat(os.path.join(Base_Dir, str(counter))) except: os.mkdir(os.path.join(Base_Dir, str(counter))) counter += 1 counter = 0 for file in glob.glob("*.wav"): if os.path.getsize(file) > 100: args = [file, 'out.wav', 'silence', '1', '0.1', '0.1', '1', '0.1', '0.1', ': newfile', ': restart'] sx.sox(args) pos = 0 for c in os.path.splitext(file)[0]: if pos < 9: out = "out00" + str(pos + 1) + ".wav" else: out = "out0" + str(pos + 1) + ".wav" new = str(counter) + ".wav" oldLoc = os.path.join(Base_Dir, new) newLoc = os.path.join(os.path.join(Base_Dir, str(c)), new) print "Old - " + out print "New - " + new os.rename(out, new) shutil.move(oldLoc, newLoc) pos += 1