Ejemplo n.º 1
0
def downmix(src, dst):
    rand = randstr(20)
    tfm = sox.Transformer()
    tfm.remix({1:[1]})
    l1 = "/tmp/%s_l1.ogg" % rand
    tfm.build(src, l1)
    tfm = sox.Transformer()
    tfm.remix({1:[2]})
    c1 = "/tmp/%s_c1.ogg" % rand
    tfm.gain(-10.0)
    tfm.build(src, c1)
    tfm = sox.Transformer()
    tfm.remix({1:[3]})
    r1 = "/tmp/%s_r1.ogg" % rand
    tfm.build(src, r1)

    cbn = sox.Combiner()
    l2 = "/tmp/%s_l2.ogg" % rand
    cbn.build([l1,c1], l2, 'mix')
    cbn = sox.Combiner()
    r2 = "/tmp/%s_r2.ogg" % rand
    cbn.build([r1,c1], r2, 'mix')

    cbn = sox.Combiner()
    cbn.build([l2,r2], dst, 'merge')
Ejemplo n.º 2
0
def _trim_tracks_as_wav(extracted_tracks, trimlist, framerate, framenum,
                        offset_time, silent):
    try:
        import sox
    except ModuleNotFoundError:
        raise ModuleNotFoundError(
            'AudioProcessor.VideoSource: missing sox dependency for trimming.')
    framerate = Fraction(framerate)
    SPF = float(1.0 / framerate)
    trimfiles = []
    temp_outfiles = []
    for track in extracted_tracks:
        out_path_prefix = os.path.splitext(track)[0]
        outfile = f"{out_path_prefix}_cut.wav"
        trimfiles.append(outfile)
        if type(trimlist[0]) is list and len(trimlist) > 1:
            for index, trim in enumerate(trimlist, start=1):
                temp_outfile = f"{out_path_prefix}_temp{index}.wav"
                temp_outfiles.append(temp_outfile)
                _sox_trim(track, temp_outfile, trim, framenum, offset_time,
                          SPF, silent)
            cbn = sox.Combiner()
            if silent:
                cbn.set_globals(verbosity=0)
            formats = ['wav' for file in temp_outfiles]
            cbn.set_input_format(file_type=formats)
            cbn.build(temp_outfiles, outfile, 'concatenate')
        elif type(trimlist[0]) is int or type(trimlist[0]) is type(None):
            _sox_trim(track, outfile, trimlist, framenum, offset_time, SPF,
                      silent)
    return trimfiles, temp_outfiles
Ejemplo n.º 3
0
    def checker_track(self,
            output_file_name,
            gap=1.0,
            repeat_count=5,
            mute_first=False,
            mute_last=False):

        """Repeat the sample on alternating tracks so the fade in and out can overlap"""

        track_a_file = self.temp_folder + 'track-a.wav'
        track_b_file = self.temp_folder + 'track-b.wav'

        half, remainder = divmod(repeat_count, 2)
        track_a_repeat_count = half + remainder - 1
        track_b_repeat_count = half - 1

        if mute_last:
            if remainder:
                # there are an odd number of repeats, so the muted last repetition is in track A
                self.make_track(track_a_file, gap, track_a_repeat_count, mute_last=mute_last)
                self.make_track(track_b_file, gap, track_b_repeat_count, has_initial_rest=True)
            else:
                # there are an even number of repeats, so the muted last repetition is in track B
                self.make_track(track_a_file, gap, track_a_repeat_count)
                self.make_track(track_b_file, gap, track_b_repeat_count, has_initial_rest=True, mute_last=mute_last)

        else:
            self.make_track(track_a_file, gap, track_a_repeat_count, mute_first=mute_first)
            self.make_track(track_b_file, gap, track_b_repeat_count, has_initial_rest=True)

        cbn = sox.Combiner()
        cbn.build([track_a_file, track_b_file], output_file_name, 'mix-power')
Ejemplo n.º 4
0
def find_music(audio_file):
    modelName = "pyAA/data/svmSM"

    [Fs, x] = aIO.readAudioFile(audio_file)
    duration = x.shape[0] / float(Fs)
    t1 = time.clock()
    flagsInd, classNames, acc, CMt = aS.mtFileClassification(
        audio_file, modelName, "svm", False, '')
    [
        Classifier, MEAN, STD, classNames, mtWin, mtStep, stWin, stStep,
        computeBEAT
    ] = aT.loadSVModel(modelName)
    t2 = time.clock()
    perTime1 = duration / (t2 - t1)
    flags = [classNames[int(f)] for f in flagsInd]
    (segs, classes) = aS.flags2segs(flags, mtStep)

    i = 0  #len(classes)-1
    file_parts = []

    cbn = sox.Combiner()
    if len(classes) > 1:
        for c in classes:
            if c == 'music':
                start = segs[i][0]
                if i != 0:
                    start -= 0.5
                end = segs[i][1]
                if i != len(classes) - 1:
                    end += 2.5

                file_parts.append((int(start * 1000), int(end * 1000)))
            i += 1

    return file_parts
Ejemplo n.º 5
0
def convert_audios(settings,path,pathto,vadfactor=[],trimmfactor=[]): 
    try:
        if len(path) > 1:
            tools.printer(2,'combining','')
            cbn = sox.Combiner()
            if settings[0] == 'mp3':
                cbn.set_output_format(file_type=settings[0],rate=settings[1],channels=settings[2])
            else:
                cbn.set_output_format(file_type=settings[0],rate=settings[1],channels=settings[2],bits=settings[3],encoding=settings[4])
            cbn.convert()
            cbn.build(
                path, pathto, 'concatenate'
            )
        else:
    
            tfm = sox.Transformer()

            if settings[0] == 'mp3':
                tfm.set_output_format(file_type=settings[0],rate=settings[1],channels=settings[2])          
            else:
                tfm.set_output_format(file_type=settings[0],rate=settings[1],channels=settings[2],bits=settings[3],encoding=settings[4])          
    
    
            if len(trimmfactor) > 1:
                tfm.trim(trimmfactor[0],trimmfactor[1])
            if len(vadfactor) > 1:

                tfm.vad(initial_pad=vadfactor[0])
                tfm.vad(location=-1, initial_pad=vadfactor[1])
    
            tfm.convert()
            tfm.build(path[0], pathto) 
        return True
    except:
            return False
Ejemplo n.º 6
0
def generate_full_mp3(solo_parts: List[Part]) -> None:
    combiner = sox.Combiner()

    # docs https://pysox.readthedocs.io/en/latest/api.html
    input_files = [part.mp3_filepath() for part in solo_parts]
    output_file_path = "{}/all.mp3".format(args.output)
    combiner.build(input_files, output_file_path, 'mix-power')
Ejemplo n.º 7
0
def compose(input_files,
            output_path,
            samplerate=22000,
            n_channels=2,
            file_extension=".mp3"):
    cbn = sox.Combiner()
    cbn.convert(samplerate=samplerate, n_channels=n_channels)
    cbn.build(input_files, output_path, 'concatenate')
    return output_path
Ejemplo n.º 8
0
    def addToPlaylist(self, f):
        self.totalSong = len(f[0])
        for i in range(len(f[0])):
            tfm1 = sox.Transformer()
            sox_duration = sox.file_info.duration(f[0][i])
            tfm1.fade(fade_in_len=5)
            tfm1.trim(0, sox_duration - 17)
            tfm1.build_file(f[0][i], 'C:\\Users\\Fadli\\Downloads\\Part1.wav')

            tfm2 = sox.Transformer()
            tfm2.trim(sox_duration - 17, sox_duration - 16.5)
            tfm2.bass(-5)
            tfm2.build_file(f[0][i], 'C:\\Users\\Fadli\\Downloads\\Part2.wav')

            tfm3 = sox.Transformer()
            tfm3.trim(sox_duration - 16.5, sox_duration - 16)
            tfm3.bass(-15)
            tfm3.build_file(f[0][i], 'C:\\Users\\Fadli\\Downloads\\Part3.wav')

            tfm4 = sox.Transformer()
            tfm4.trim(sox_duration - 16, sox_duration)
            tfm4.fade(fade_out_len=16)
            tfm4.bass(-35)
            tfm4.build_file(f[0][i], 'C:\\Users\\Fadli\\Downloads\\Part4.wav')

            sox_output, _ = f[0][i].rsplit('.', 1)
            sox_output += '.wav'

            cbn = sox.Combiner()
            cbn.build([
                'C:\\Users\\Fadli\\Downloads\\Part1.wav',
                'C:\\Users\\Fadli\\Downloads\\Part2.wav',
                'C:\\Users\\Fadli\\Downloads\\Part3.wav',
                'C:\\Users\\Fadli\\Downloads\\Part4.wav'
            ], sox_output, 'concatenate')

            tempSongname = QUrl.fromLocalFile(f[0][i])
            tempSongname, _ = tempSongname.fileName().rsplit('.', 1)
            proc = madmom.features.beats.DBNBeatTrackingProcessor(fps=100)
            act = madmom.features.beats.RNNBeatProcessor(
                online=True, nn_files=[BEATS_LSTM[0]])(sox_output)
            beatTimes = proc(act)
            beatAvg = 0
            for j in range(len(beatTimes) - 1):
                beatAvg += 60 / (beatTimes[j + 1] - beatTimes[j])
            tempo = round(beatAvg / len(beatTimes))
            self.ui.songList.insertItem(self.index, tempSongname)
            self.ui.songList.setCurrentRow(0)
            self.ui.songList2.insertItem(self.index, tempSongname)
            self.ui.songList2.setCurrentRow(0)
            self.playlist[self.index].append(sox_output)
            self.playlist[self.index].append(tempo)
            self.playlist[self.index].append(beatTimes)
            self.playlist.append([])
            print(self.playlist)
            self.index += 1
            self.totalSong -= 1
Ejemplo n.º 9
0
 def get_array(self) -> AudioArray:
     combiner = sox.Combiner()
     input_files = [input.get_temp_file() for input in self.__inputs]
     combiner.build(input_filepath_list=input_files,
                    output_filepath=self._temp_filepath,
                    combine_type=self.__combine_type)
     return AudioArray(array=sox.Transformer().build_array(
         input_filepath=self._temp_filepath),
                       sample_rate=sox.file_info.sample_rate(
                           input_filepath=self._temp_filepath))
Ejemplo n.º 10
0
def write_chords(paths, annotations, write=False, strategy = 'powers'):
    """
    Building chords, triads, septachord

    notes -> chords
    """

    print ('Generating chords of class "{}"'.format(strategy))

    strategy_config = yaml.load(open(paths.get('strategies').get(strategy), 'r'))
    annotations_df = pd.read_csv(annotations, index_col = 0)
    annotations_df['pitch'] = pd.to_numeric(annotations_df['pitch'])
    records = annotations_df.set_index(['guitarModel', 'pitch'])['audioFileName'].to_dict()
    directories = []

    file_ticker = 0

    history = {}

    for ii in annotations_df[['guitarModel', 'pitch', 'audioFileName']].itertuples():

        pitch = int(ii.pitch)
        model = ii.guitarModel
        if model not in history:
            history[model] = 0

        audioname = ii.audioFileName.split('/')[-1].strip('.wav')
        subdirectory = os.path.join(paths.get('interim').trace, strategy, model, '')

        if model not in directories:
            os.makedirs(subdirectory, exist_ok = True)
            directories.append(model)

        for chord, segment in strategy_config.items():
            for s, pitch_components in segment.items():
                [*bindings], [*components] = zip(*[(records.get((model, pitch + x)), str(pitch + x)) for x in pitch_components])
                if any([fn is None for fn in bindings]) is False:
                    history[model] += 1
                    rename = '_'.join([audioname, chord, s] + components) + '.wav'
                    rename = os.path.join(subdirectory, rename)
                    if write is True:
                        combiner = sox.Combiner()
                        # BINDINGS =  OTHER TRACKS MIXING
                        combiner.build(bindings, rename, 'mix')
                        file_ticker += 1
                    else:
                        pass
                else:
                    break

    print('----- {} GENERATED BY MODEl -----'.format(strategy.upper()))
    pprint (history)
Ejemplo n.º 11
0
def mix_mono(outputname, *inputname) -> None:
    outputname = "{0}.wav".format(outputname)
    inputname = tuple("{0}.wav".format(n) for n in inputname)

    size = len(inputname)
    if size > 1:
        cbn = sox.Combiner()
        cbn.build(list(inputname), outputname, "merge")
    elif size == 1:
        shutil.copyfile(inputname[0], outputname)
    else:
        with open(outputname, "w") as f:
            f.write("")
Ejemplo n.º 12
0
    def phase(self,
              output_file_name=None,
              n_tracks=9,
              gap=.03,
              repeat_count=20,
              end_align=False):

        if output_file_name == None:
            output_file_name = self.output_file_name
        track_file_names = []
        for i in range(1, n_tracks + 1):
            track_file_name = self.temp_folder + 'track-{}.wav'.format(i)
            track_file_names.append(track_file_name)

            mute_first = False
            if not end_align and i is not 1:
                mute_first = True

            mute_last = False
            if end_align and i is not n_tracks:
                mute_last = True

            self.checker_track(track_file_name,
                               gap=gap * i,
                               repeat_count=repeat_count,
                               mute_first=mute_first,
                               mute_last=mute_last)

        if end_align:
            track_durations = [
                sox.file_info.duration(f) for f in track_file_names
            ]
            longest_track_duration = max(track_durations)
            track_duration_diffs = [
                longest_track_duration - d for d in track_durations
            ]
            new_track_file_names = []
            for i, diff, track_file_name in zip(range(1, n_tracks + 1),
                                                track_duration_diffs,
                                                track_file_names):
                new_track_file_name = track_file_name[:-4] + '-start-offset.wav'
                new_track_file_names.append(new_track_file_name)
                tfm = sox.Transformer()
                tfm.pad(start_duration=diff + (gap * i))
                tfm.build(track_file_name, new_track_file_name)
            track_file_names = new_track_file_names

        cbn = sox.Combiner()
        cbn.silence(location=1)  # Remove silence from the beginning
        cbn.silence(location=-1)  # Remove silence from the end
        cbn.build(track_file_names, output_file_name, 'mix-power')
Ejemplo n.º 13
0
def before_request() -> None:
    g.request_name = request.path
    g.params = '()'
    g.error = False
    g.path_to_files = []
    g.transformer = sox.Transformer()
    g.combiner = sox.Combiner()
    total_size = 0
    try:
        total_size = sum([os.path.getsize(INPUT_DIRECTORY + file) if os.path.exists(INPUT_DIRECTORY + file) else 0 for file in os.listdir(INPUT_DIRECTORY)]) + \
                    sum([os.path.getsize(OUTPUT_DIRECTORY + file) if os.path.exists(OUTPUT_DIRECTORY + file) else 0 for file in os.listdir(OUTPUT_DIRECTORY)])
    except Exception as e:
        print(e)
    g.access = total_size < STORAGE_LIMIT
Ejemplo n.º 14
0
def mix_multitrack(mtrack,
                   output_path,
                   stem_indices=None,
                   alternate_weights=None,
                   alternate_files=None,
                   additional_files=None):
    """Mix the stems of a multitrack to create a new mix.
    Can optionally adjust the volume of stems and replace, remove, or add
    stems.

    Parameters
    ----------
    mtrack : Multitrack
        Multitrack object
    output_path : str
        Path to save output file.
    stem_indices : list or None, default=None
        stem indices to include in mix.
        If None, mixes all stems
    alternate_weights : dict or None, default=None
        Dictionary with stem indices as keys and mixing coefficients as values.
        Stem indices present that are not in this dictionary will use the
        default estimated mixing coefficient.
    alternate_files : dict or None, default=None
        Dictionary with stem indices as keys and filepaths as values.
        Audio file to use in place of original stem. Stem indices present that
        are not in this dictionary will use the original stems.
    additional_files : list of tuple or None, default=None
        List of tuples of (filepath, mixing_coefficient) pairs to additionally
        add to final mix.

    Returns
    -------
    filepaths : list
        List of filepaths used in the mix
    weights : list
        List of weights used to mix filepaths

    """
    filepaths, weights = _build_mix_args(mtrack, stem_indices,
                                         alternate_weights, alternate_files,
                                         additional_files)

    if len(filepaths) == 1:
        shutil.copyfile(filepaths[0], output_path)
    else:
        cbn = sox.Combiner()
        cbn.build(filepaths, output_path, 'mix', input_volumes=weights)

    return filepaths, weights
Ejemplo n.º 15
0
def get_integrated_lufs(filepath, min_duration=0.5):
    """
    Returns the integrated LUFS for an audiofile.

    For files shorter than 400 ms ffmpeg returns a constant integrated LUFS
    value of -70.0. To avoid this, files shorter than min_duration (by default
    500 ms) are self-concatenated until min_duration is reached and the
    LUFS value is computed for the concatenated file.

    Parameters
    ----------
    filepath : str
        Path to audio file for computing LUFS
    min_duration : float
        Minimum required duration for computing LUFS value. Files shorter than
        this are self-concatenated until their duration reaches this value
        for the purpose of computing the integrated LUFS. Caution: if you set
        min_duration < 0.4, a constant LUFS value of -70.0 will be returned for
        all files shorter than 400 ms.

    Returns
    -------

    """
    try:
        duration = sox.file_info.duration(filepath)
    except Exception as e:
        raise ScaperError(
            'Unable to obtain LUFS for {:s}, error message:\n{:s}'.format(
                filepath, e.__str__()))

    if duration < min_duration:
        # compute how many concatenations we require
        n_tiles = int(np.ceil(min_duration / duration))

        # Concatenate audio to itself, save to temp file and get LUFS
        tmpfiles = []
        with _close_temp_files(tmpfiles):
            concat_file = tempfile.NamedTemporaryFile(suffix='.wav',
                                                      delete=False)
            tmpfiles.append(concat_file)

            cbn = sox.Combiner()
            cbn.build([filepath] * n_tiles, concat_file.name, 'concatenate')

            loudness_stats = r128stats(concat_file.name)
    else:
        loudness_stats = r128stats(filepath)

    return loudness_stats['I']
Ejemplo n.º 16
0
def align_mixed(file1, file2, tir):
	f1name = file1.split('/')[-1]
	f2name = file2.split('/')[-1]
	f1speaker = file1.split('/')[-2]
	f2speaker = file2.split('/')[-2]
	mix_fname = os.path.join(bin_path, 'mixed', f1speaker + '_' + f2speaker + '_' + str(tir), f1name[:-4] + '_' + f2name)

	tfn = sox.Transformer()
	tfn.silence(location=-1)

	cbn = sox.Combiner()
	cbn.set_input_format(file_type=['wav', 'wav'])

	len1 = float(tfn.stat(file1)['Length (seconds)'])
	len2 = float(tfn.stat(file2)['Length (seconds)'])

	rms1 = sox.file_info.stat(file1)['RMS     amplitude']
	rms2 = sox.file_info.stat(file2)['RMS     amplitude']
	factor = tir_factor(tir, rms1, rms2)

	os.makedirs(os.path.join(bin_path, 'mixed', f1speaker + '_' + f2speaker + '_' + str(tir)), exist_ok=True)

	if len1 < len2:		
		tfn.trim(0, len1)
		tfn.build(file2, os.path.join(bin_path, 'mixed', f2name))
		cbn.build([file1, os.path.join(bin_path, 'mixed', f2name)], mix_fname, 
								'mix', [1, 1 / factor])
		shutil.copy(file1[:-4] + '.PHN', mix_fname[:-4] + '.PHN')
		os.remove(os.path.join(bin_path, 'mixed', f2name))

	else:
		tfn.trim(0, len2)
		tfn.build(file1, os.path.join(bin_path, 'mixed', f1name))
		cbn.build([os.path.join(bin_path, 'mixed', f1name), file2], mix_fname, 
								'mix', [1, 1 / factor])
		shutil.copy(file2[:-4] + '.PHN', mix_fname[:-4] + '.PHN')
		os.remove(os.path.join(bin_path, 'mixed', f1name))

	
	generate_dict(mix_fname[:-4] + '.PHN')

	with open(mix_fname[:-4] + '.lab', 'w+') as f:
		f.write('WORD')

	os.makedirs(os.path.join(bin_path, 'mixed', 'aligned'), exist_ok=True)
	subprocess.run([os.path.join('./', bin_path, 'mfa_align'), os.path.join(bin_path, 'mixed', f1speaker + '_' + f2speaker + '_' + str(tir)), 
										 mix_fname[:-4] + '.dict', 'english', os.path.join(bin_path, 'mixed', 'aligned', f1speaker + '_' + f2speaker + '_' + str(tir))])
Ejemplo n.º 17
0
def mix_phonemes(phn1, phn2):
  phn1_list = [os.path.join(test_set, phn1, file) for file in os.listdir(os.path.join(test_set, phn1))]
  phn2_list = [os.path.join(test_set, phn2, file) for file in os.listdir(os.path.join(test_set, phn2))]

  file1 = phn1_list[random.randint(0, phn_occurrence[phn1] - 1)]
  file2 = phn2_list[random.randint(0, phn_occurrence[phn2] - 1)]

  rms1 = sox.file_info.stat(file1)['RMS     amplitude']
  rms2 = sox.file_info.stat(file2)['RMS     amplitude']
  factor = tir_factor(0, rms1, rms2)

  cbn = sox.Combiner()
  cbn.set_input_format(file_type=['wav', 'wav'])
  cbn.build([file1, file2], 'test/new.wav', 'mix', [1, 1 / factor])

  pred = test_mixed('test/new.wav')
  os.remove('test/new.wav')
  return pred
Ejemplo n.º 18
0
def Deep():
    try:
        if tkMessageBox.askyesno("Confirmation", "Would you like to proceed?"):

            BEAM_WIDTH = 500
            LM_WEIGHT = 1.75
            WORD_COUNT_WEIGHT = 1.00
            VALID_WORD_COUNT_WEIGHT = 1.00
            N_FEATURES = 26
            N_CONTEXT = 9

            ds = Model('models/models.pb', N_FEATURES, N_CONTEXT,
                       'models/alphabet.txt', BEAM_WIDTH)

            fs, audio = wav.read(audiofile.get())

            if fs != 16000:
                cbn = sox.Combiner()
                cbn.convert(samplerate=16000, n_channels=1)
                cbn.build([str(audiofile.get())], './', 'concatenate')
                fs, audio = wav.read('./')

            audio_length = len(audio) * (1 / 16000)

            resultpage = Toplevel(parent)
            resultpage.title("Result")
            result_border = ttk.Frame(resultpage, padding=(12, 12, 12, 12))
            result_border.pack()
            result_page = Frame(result_border, bg="white")
            result_page.pack()

            Tkinter.Label(result_page,
                          text="What I've heard from you:",
                          font=14,
                          bg="white").grid(row=1, column=1, sticky=E)
            Tkinter.Label(result_page, textvariable=word, font=12,
                          bg="white").grid(row=2, column=2, sticky=E)

            word.set(ds.stt(audio, fs))

    except ValueError:
        tkMessageBox.showerror("Error!", "Only 16000Hz WAV files supported!")
    except IOError:
        tkMessageBox.showerror("Error!", "No file uploaded!")
Ejemplo n.º 19
0
def combine_audio_files(params):

    cmb = sox.Combiner()
    cmb.convert(samplerate=22050)
    cmb.build(
        [
            os.path.join(params['audio_folder'], params['filenames'][0]),
            os.path.join(params['audio_folder'], params['filenames'][1]),
            os.path.join(params['audio_folder'], params['filenames'][2]),
            os.path.join(params['audio_folder'], params['filenames'][3])
        ],
        os.path.join(config.audio_save_folder, params['output_fname']), 'mix')  # , 'mix', input_volumes=[0.6, 0.3, 0.3, 0.3])

    # if the reverb option is active, this creates the reverb audio files using an IR from Isophonics
    if params['reverb']:
        y_ir, sr_ir = librosa.load('./ir/IR_greathall.wav', sr=params['sr'])
        y_sig, sr_sig = librosa.load(os.path.join(config.audio_save_folder, params['output_fname']), sr=params['sr'])
        y_rev = scipy.signal.convolve(y_sig, y_ir, mode="full")
        soundfile.write(os.path.join(config.audio_save_folder, 'reverb', params['output_fname']), y_rev, samplerate=params['sr'])
Ejemplo n.º 20
0
def _mpls_audio(mplsdict, nocleanup, silent):
    cliplist = mplsdict['clip']
    infiles = []
    for clip in cliplist:
        if clip:
            infile = str(clip, 'utf-8')
            infile = os.path.normpath(infile)
            infiles.append(infile)

    out_path_prefix = os.path.splitext(str(cliplist[0], 'utf-8'))[0]
    outfiles = []
    concat_files = []
    if len(infiles) > 1:
        try:
            import sox
        except ModuleNotFoundError:
            raise ModuleNotFoundError(
                'AudioProcessor.VideoSource: missing sox dependency for concatonating.'
            )
        for infile in infiles:
            extracted_tracks, framerate, framenum = _extract_tracks_as_wav(
                infile, silent)
            concat_files.append(extracted_tracks)
        for i in range(len(concat_files[0])):
            combine_files = [
                concat_files[j][i] for j in range(len(concat_files))
            ]
            cbn = sox.Combiner()
            if silent:
                cbn.set_globals(verbosity=0)
            formats = ['wav' for file in extracted_tracks]
            cbn.set_input_format(file_type=formats)
            outfile = f"{out_path_prefix}_{i+2}_concat.wav"
            outfiles.append(outfile)
            cbn.build(combine_files, outfile, 'concatenate')
        if not nocleanup:
            for item in concat_files:
                _cleanup_temp_files(item)
        return outfiles, framerate, framenum
    else:
        outfile = infiles
        return outfile, None, None
Ejemplo n.º 21
0
def phonemes_to_audio(syllables: Tuple[str, str],
                      voice_features: Dict,
                      name: str = 'tom_nook') -> None:
    """音素转音频"""
    phonemes = []
    for initial, final in syllables:
        if initial:
            phonemes.append(initial)
        if final:
            phonemes.append(final)

    phoneme_sounds = [f'pinyin_phonemes/{phoneme}.wav' for phoneme in phonemes]

    synthesized = sox.Combiner()
    vf = voice_features[name]
    pitch_shift_random_range, tempo = vf['pitch_shift_random_range'], vf[
        'tempo']
    synthesized.pitch(random.uniform(*pitch_shift_random_range))
    synthesized.tempo(tempo)
    synthesized.build(phoneme_sounds, 'synthesized.wav', 'concatenate')
Ejemplo n.º 22
0
def maybe_convert_one_to_wav(entry):
    root, _, files = entry
    transformer = sox.Transformer()
    transformer.convert(samplerate=SAMPLE_RATE, n_channels=CHANNELS)
    combiner = sox.Combiner()
    combiner.convert(samplerate=SAMPLE_RATE, n_channels=CHANNELS)
    output_wav = os.path.join(root, WAV_NAME)
    if os.path.isfile(output_wav):
        return
    files = sorted(glob(os.path.join(root, AUDIO_PATTERN)))
    try:
        if len(files) == 1:
            transformer.build(files[0], output_wav)
        elif len(files) > 1:
            wav_files = []
            for i, file in enumerate(files):
                wav_path = os.path.join(root, "audio{}.wav".format(i))
                transformer.build(file, wav_path)
                wav_files.append(wav_path)
            combiner.set_input_format(file_type=["wav"] * len(wav_files))
            combiner.build(wav_files, output_wav, "concatenate")
    except sox.core.SoxError:
        return
Ejemplo n.º 23
0
def generate_accompaniment(own_part, solo_parts) -> None:
    combiner = sox.Combiner()

    accompaniment_volume_ratio = 0.33
    instrumental_volume_ratio = accompaniment_volume_ratio * args.instrumental_volume
    input_volumes = []
    input_files = []
    for part in solo_parts:
        is_own_part = part.name == own_part.name
        is_instrumental = part.name == 'accompaniment'

        input_files.append(part.mp3_filepath())

        if is_own_part:
            input_volumes.append(1.0)
        elif is_instrumental:
            input_volumes.append(instrumental_volume_ratio)
        else:
            input_volumes.append(accompaniment_volume_ratio)

    output_file_path = "{}/{} with accompaniment.mp3".format(
        args.output, own_part.name)
    combiner.build(input_files, output_file_path, 'mix-power', input_volumes)
Ejemplo n.º 24
0
def gen_audio_file(trs, langin, langout, pattern, fileout):

    temp = '/tmp'
    # settle tmp
    temp += '/mvoad-' + str(time.time())
    os.mkdir(temp)

    tmpfn = temp + '/mvoad.tmp.wav'
    tmpold = temp + '/mvoad.tmp.dest1.mp3'
    tmpdest = temp + '/mvoad.tmp.dest.mp3'

    for tr in trs:
        untrsname = temp + '/mvoad.src.word.mp3'
        trsname = temp + '/mvoad.dest.word.mp3'
        Speech(tr.origin, langin).save(untrsname)
        Speech(tr.text, langout).save(trsname)
        for item in pattern:
            # sox
            inter = tmpdest
            tmpdest = tmpold
            tmpold = inter
            if (item[0]):
                filename = trsname
            else:
                filename = untrsname
            t = sox.Transformer()
            t.tempo(item[1], 's')
            t.pad(0, item[2])
            if pattern.index(item) != 0 or trs.index(tr) != 0:
                t.build(filename, tmpfn)
                cbn = sox.Combiner()
                cbn.build([tmpdest, tmpfn], tmpold, 'concatenate')
            else:
                t.build(filename, tmpold)
    shutil.move(tmpold, fileout)
    shutil.rmtree(temp)
Ejemplo n.º 25
0
import os
import shutil
import glob
import sox

os.mkdir('making_temp')

tfm = sox.Transformer()
tfm.pad(0.0, 3.0)

files = glob.glob('./jllepd/b*.mp3')

for file in files:
    tfm.build(file, './making_temp/' + os.path.basename(file))

files = sorted(glob.glob('./making_temp/*.*'))
cbn = sox.Combiner()
cbn.convert(samplerate=44100, n_channels=2)
cbn.build(files, './Bb.mp3', 'concatenate')

shutil.rmtree('making_temp')
Ejemplo n.º 26
0
def alignment_helper(file_list, target_path):
    """Downsample and perform cross-correlation on files relative
    to a target file to test if they are correctly aligned.

    Parameters
    ----------
    file_list : list
        List of files (i.e. stem_files, raw_files)
    target_path : str
        Filepath to compare files in file_list to.

    Returns
    -------
    status : bool
        True if the cross_correlation values are within a threshold, demonstrating
        that the files are correctly aligned.
    """
    sr = 1000
    output_handle = tempfile.NamedTemporaryFile(suffix='.wav')
    output_path = output_handle.name

    if len(file_list) > 1:
        file_sum = sox.Combiner()
        file_sum.build(file_list, output_path, 'mix')
    else:
        file_sum = sox.Transformer()
        file_sum.build(file_list[0], output_path)

    file_sum.rate(sr, 'm')

    target_handle = tempfile.NamedTemporaryFile(suffix='.wav')
    target_handle_path = target_handle.name
    target_sum = sox.Transformer()
    target_sum.build(target_path, target_handle_path)
    target_sum.rate(sr, 'm')

    dur = get_length(target_path)
    offset = (dur / 44100.0) / 2.0
    y_files, sr = librosa.load(output_path,
                               sr=sr,
                               offset=offset,
                               duration=30.0)
    y_target, sr = librosa.load(target_handle_path,
                                sr=sr,
                                offset=offset,
                                duration=30.0)

    correlation = np.correlate(y_files, y_target, 'full')

    N = len(y_target)
    a = np.arange(1, N + 1)
    a_rev = np.arange(1, N)
    b = a_rev[::-1]
    c = np.concatenate((a, b))
    c = c.astype(float)

    correlation = np.abs(correlation) / c
    center = N
    corr_index = np.argmax(correlation)

    if np.abs(corr_index - center) > 5:
        return False
    else:
        return True
def generate_stream_and_labels(
    dest_dir,
    wav_intermediates,
    wav_data,
    target_word,
    target_lang,
    cv_clipsdir,
):
    ###############################################
    ## GENERATE LONG WAVFILE AND LABELS
    ###############################################
    # step 1: convert to wavs (to avoid slop in mp3 timings)
    # food for thought: sox mp3s may have a gap when concatenating
    # https://stackoverflow.com/questions/25280958/sox-concatenate-multiple-audio-files-without-a-gap-in-between

    assert os.path.isdir(cv_clipsdir), "cv data not found"

    assert os.path.isdir(dest_dir), "no dest dir available"
    assert os.path.isdir(
        wav_intermediates), "no destination intermediate wav dir available"
    assert os.listdir(
        wav_intermediates) == [], "intermediate wav dir not empty"

    label_file = dest_dir / "streaming_labels.txt"
    wav_stream_file = str(dest_dir / "streaming_test.wav")

    assert not os.path.isfile(label_file), "label file exists already"
    assert not os.path.isfile(wav_stream_file), "wav stream exists already"

    wavs = []
    total_duration_mp3s_s = 0
    for ix, stream_component in enumerate(wav_data):
        mp3name_no_ext = stream_component["mp3name_no_ext"]

        if ix % 250 == 0:
            print("mp3 to wav", ix)
        mp3path = cv_clipsdir / (mp3name_no_ext + ".mp3")
        if not os.path.exists(mp3path):
            raise ValueError("could not find", mp3path)

        duration_s = sox.file_info.duration(mp3path)
        total_duration_mp3s_s += duration_s

        wav = str(wav_intermediates / (mp3name_no_ext + ".wav"))
        transformer = sox.Transformer()
        transformer.convert(samplerate=16000)  # from 48K mp3s

        transformer.build(str(mp3path), wav)
        wavs.append(wav)

    print(total_duration_mp3s_s, "sec = ", total_duration_mp3s_s / 60, "min")
    print(len(wavs))

    # step 2: how long is the sum of each wav according to sox?
    total_duration_wavs_s = 0
    for w in wavs:
        duration_s = sox.file_info.duration(w)
        total_duration_wavs_s += duration_s
    print(
        "individual wavs:",
        total_duration_wavs_s,
        "sec = ",
        total_duration_wavs_s / 60,
        "min",
    )

    # step 3: combine the wavs. godspeed.
    combiner = sox.Combiner()
    combiner.convert(samplerate=16000, n_channels=1)
    # https://github.com/rabitt/pysox/blob/master/sox/combine.py#L46
    combiner.build(wavs, wav_stream_file, "concatenate")

    # step 4: how long is the total wavfile? should be the sum of the individual wavs
    duration_s = sox.file_info.duration(wav_stream_file)
    print("concatenated wav:", duration_s, "sec = ", duration_s / 60, "min")

    # step 5: generate labels using the wav file durations, not the sloppy mp3 file durations

    target_times_s = []
    current_sentence_start_s = 0
    for ix, stream_component in enumerate(wav_data):
        mp3name_no_ext = stream_component["mp3name_no_ext"]
        wavpath = wav_intermediates / (mp3name_no_ext + ".wav")
        sentence_duration_s = sox.file_info.duration(wavpath)
        if not stream_component["is_target"]:
            # add full duration of non-target sentence to current offset
            current_sentence_start_s += sentence_duration_s
            continue
        start_s = stream_component["start_s"]
        end_s = stream_component["end_s"]
        target_utterance_start_s = current_sentence_start_s + float(start_s)
        target_utterance_end_s = current_sentence_start_s + float(end_s)
        target_times_s.append(
            (target_utterance_start_s, target_utterance_end_s))
        current_sentence_start_s += sentence_duration_s

    # step 6: write labels out
    # the label timings should indicate the start of each target utterance in ms
    with open(label_file, "w") as fh:
        for start_s, _ in target_times_s:
            start_ms = start_s * 1000
            fh.write(f"{target_word}, {start_ms}\n")
    return target_times_s
Ejemplo n.º 28
0
import numpy as np
import tensorflow as tf
from data.train_data import process_save_wav_sample
from data.train_data import process_save_wav_sample_x
from data.train_data import AudPreEmphasize
from data.baidu_emotion import get_wav_emotion
import wave
import os
import scipy.io.wavfile as wav
from data.example import myvad
import socket
import sox
import threading
import time
cbn = sox.Combiner()  #合并音频文件
#myser.get_emotion(test.wav)

s = socket.socket()  #Create a socket object
bsize = s.getsockopt(socket.SOL_SOCKET, socket.SO_SNDBUF)
print(bsize)
host = socket.gethostname()  #Get the local machine name
port = 12397  # Reserve a port for your service
s.bind(("", port))  #Bind to the port
s.listen(5)  #Wait for the client connection
data = []
'''
结果

[[0.02977182 0.70166206 0.23576824 0.03279793]]
[[2.7817843e-02 8.8926041e-01 8.2342632e-02 5.7907921e-04]]
Ejemplo n.º 29
0
        wav=target_wav,
        transcript=target_transcript,
        duration_s=durations_s[0],
    )
    nw_info = dict(
        ix=2 * ix + 1,
        wav=non_targets[nontarget_ix][0],
        transcript=non_targets[nontarget_ix][1],
        duration_s=durations_s[1],
    )
    stream_info.extend([tw_info, nw_info])

assert len(stream_wavs) == n_stream_wavs * 2, "not enough stream data"
stream_wavfile = str(workdir / "covid_stream.wav")

combiner = sox.Combiner()
combiner.convert(samplerate=16000, n_channels=1)
# https://github.com/rabitt/pysox/blob/master/sox/combine.py#L46
combiner.build(stream_wavs, stream_wavfile, "concatenate")

dur_info = sum([d["duration_s"] for d in stream_info])
print(sox.file_info.duration(stream_wavfile), "seconds in length", dur_info)

with open(stream_info_file, "wb") as fh:
    pickle.dump(stream_info, fh)

# %%

# load embedding model
traindir = Path(f"/home/mark/tinyspeech_harvard/multilang_embedding")
Ejemplo n.º 30
0
def process(parameters):
    tid, n_samples = parameters
    output_list = output_dir + "dev-other.{}.lst".format(tid)

    with open(output_list, "w") as fout:
        for i in range(tid * n_samples, min(len(lines),
                                            n_samples * (tid + 1))):
            line = lines[i]
            sp = line.split("\t")
            filename = sp[0]
            # print(filename)
            # duration = sox.file_info.duration(filename)

            alignments = sp[1].strip().split("\\n")

            # Parse the alignments
            chunk_starts = [0]
            chunk_ends = []
            words = []

            cur_words = []
            cur_end = 0
            for i, alignment in enumerate(alignments):
                sp = alignment.split()
                begin = float(sp[2])
                length = float(sp[3])
                word = sp[4]

                cur_end = begin + length

                if i == 0:
                    continue

                if word == "$":
                    if length > MIN_SIL_LENGTH:
                        chunk_ends.append(cur_end - TOLERANCE)
                        chunk_starts.append(cur_end - TOLERANCE)
                        words.append(" ".join(cur_words))
                        cur_words = []
                    continue

                cur_words.append(word)

            if len(cur_words) > 0:
                chunk_ends.append(cur_end)
                words.append(" ".join(cur_words))
            else:
                chunk_starts.pop()
            # print(duration)
            # print(chunk_starts)
            # print(chunk_ends)
            # print(words)

            # Split the audios
            order = list(range(len(chunk_starts)))
            random.shuffle(order)

            new_target = " ".join([words[i] for i in order])
            new_audio_path = output_dir + filename.split("/")[-1]
            fout.write("{}\t{}\t{}\t{}\n".format(new_audio_path,
                                                 new_audio_path,
                                                 chunk_ends[-1] * 1000,
                                                 new_target))

            if len(chunk_starts) == 1:
                os.system("cp {} {}".format(filename, output_dir))
                continue

            paths = []
            for i in order:
                sox_tfm = sox.Transformer()
                sox_tfm.set_output_format(file_type="flac",
                                          encoding="signed-integer",
                                          bits=16,
                                          rate=16000)
                sox_tfm.trim(chunk_starts[i], chunk_ends[i])
                new_path = "/tmp/{}_{}.flac".format(tid, i)
                sox_tfm.build(filename, new_path)
                paths.append(new_path)

            # Combine them
            sox_comb = sox.Combiner()
            sox_comb.build(list(paths), new_audio_path, "concatenate")