コード例 #1
0
def cut(input_path, output_file, metadata):
    segments = metadata['segments']
    segments = [segment_seconds(segment) for segment in segments]

    with TempFile('.mp3') as temp_file:
        # Open a new temporary file to store audio in between processes
        if segments:
            # Cut audio into segments and create fade in/out
            # We need to use a new temporary file for each
            # audio segment
            temp_segments = [TempFile('.mp3') for segment in segments]
            try:
                for index, segment in enumerate(segments):
                    sox = Transformer()
                    sox.channels(1)
                    sox.norm(-24)
                    sox.trim(*segment)
                    sox.fade(1, 2, 't')
                    sox.build(input_path, temp_segments[index].path)

                if len(segments) > 1:
                    # Concatenate all the audio segments back together
                    # and output to our main temporary file
                    Combiner().build(
                        [temp_segment.path for temp_segment in temp_segments],
                        temp_file.path,
                        'concatenate',
                    )
                else:
                    # Only one segment so we don't need to combine anything
                    subprocess.run(
                        ['cp', temp_segments[0].path, temp_file.path])

            except Exception as e:
                raise (e)
            finally:
                # Cleanup temporary segment files even on error
                if temp_segments:
                    for temp_segment in temp_segments:
                        temp_segment.close()

        # Second process: filter, compress and EQ the
        # audio in temporary file and output to output_file
        sox = Transformer()
        sox.highpass(100)
        sox.lowpass(10000)
        sox.compand(0.005, 0.12, 6, [
            (-90, -90),
            (-70, -55),
            (-50, -35),
            (-32, -32),
            (-24, -24),
            (0, -8),
        ])
        sox.equalizer(3000, 1000, 3)
        sox.equalizer(280, 120, 3)
        sox.build(temp_file.path, output_file)
コード例 #2
0
 def preprocess_wav(cls, fpath: Union[str, Path]) -> np.ndarray:
     """Load, resample, normalize and trim a waveform."""
     transformer = Transformer()
     transformer.norm()
     transformer.silence(silence_threshold=1, min_silence_duration=0.1)
     transformer.set_output_format(rate=cls.sample_rate,
                                   bits=16,
                                   channels=1)
     wav = transformer.build_array(input_filepath=str(fpath))
     wav = wav / (2**15)
     return wav.astype(np.float32)
コード例 #3
0
ファイル: dataset.py プロジェクト: s3prl/s3prl
def loadFile(data, max_timestep):
    transformer = Transformer()
    transformer.norm()
    # transformer.silence(silence_threshold=1, min_silence_duration=0.1)
    transformer.set_output_format(rate=16000, bits=16, channels=1)
    wav = transformer.build_array(input_filepath=str(data))
    wav = torch.tensor(wav / (2**15)).float()
    length = len(wav)
    if length > max_timestep:
        start = 0
        end = max_timestep
        length = max_timestep
        wav = wav[start:end]
    length = torch.tensor(length).long()

    return wav, length
コード例 #4
0
ファイル: dataset.py プロジェクト: s3prl/s3prl
def loadFile_thread_exec(data):

    wavs = []
    lengths = []
    for i in range(len(data)):

        fullPath = data[i]
        transformer = Transformer()
        transformer.norm()
        transformer.silence(silence_threshold=1, min_silence_duration=0.1)
        transformer.set_output_format(rate=16000, bits=16, channels=1)
        wav = transformer.build_array(input_filepath=str(fullPath))
        wav = torch.tensor(wav / (2**15)).float()
        length = len(wav)
        if length > max_timestep:
            start = random.randint(0, int(length - max_timestep))
            end = start + max_timestep
            length = max_timestep
            wav = wav[start:end]
        wavs.append(wav)
        lengths.append(torch.tensor(length).long())
    return wavs, lengths
コード例 #5
0
def _processSamples(sample_list):

    for sample in sample_list:

        sample_new_name = _renameSample(sample)
        _out = join(out_path, sample_new_name)
        processed_samples.append(_out)
        _in = sample

        # Sox processing using Transform instance
        tfm = Transformer()
        tfm.convert(samplerate=44100, n_channels=2, bitdepth=16)

        if NORMALIZE:
            tfm.norm(db_level=-3)
        if SILENCE:
            tfm.silence(location=-1,
                        silence_threshold=0.05,
                        min_silence_duration=0.1)
        if PADDING:
            tfm.pad(0, PADDING)

        tfm.build(_in, _out)