コード例 #1
0
ファイル: utils.py プロジェクト: CorticoAI/denoising_DIHARD18
def is_wav(fn):
    """Returns True if ``fn`` is a WAV file."""
    try:
        WavInfoReader(fn)
    except AttributeError:
        return False
    return True
コード例 #2
0
def teach():
    if request.method == 'POST':
        # check if the post request has the file part
        language = str(request.form['languages'])
        if 'file' not in request.files:
            return jsonify(
                '{"error":"No file found in posted data","success":"false"}')
        file = request.files['file']
        if not allowed_file(file.filename):
            return jsonify(
                '{"error":"File type not supported","success":"false"}')
        if not language:
            return jsonify(
                '{"error":"Pleas enter person name","success":"false"}')
        if file.filename == '':
            return jsonify(
                '{"error":"File can not be empty","success":"false"}')
        if file and allowed_file(file.filename):
            filename = file.filename
            voice_file = os.path.join(app.config['UPLOAD_FOLDER'], filename)
            file.save(voice_file)
            voice_file = convertToWav(voice_file)
            info = WavInfoReader(voice_file)
            config = configure_speech(language, voice_file,
                                      info.fmt.channel_count,
                                      info.fmt.sample_rate)
            audio = get_audio_from_file(voice_file)
            response = client.recognize(request={
                "config": config,
                "audio": audio
            })
            transcript = str(
                response.results[0].alternatives[0].transcript).encode()
            os.remove(voice_file)
            return transcript
コード例 #3
0
    def read_file_properties(self, filename):

        wav_file = WavInfoReader(filename)

        sample_rate = wav_file.fmt.sample_rate
        num_channels = wav_file.fmt.channel_count
        bit_rate = wav_file.fmt.bits_per_sample

        return (num_channels, sample_rate, bit_rate)
コード例 #4
0
    def count_samples(self):
        '''
        Method to list the data of the dataset and count the number of samples. 
        '''

        # list .wav files in directory
        self.file_names = fnmatch.filter(os.listdir(self.path_to_input), '*.wav')
        # count the number of samples contained in the dataset
        self.total_samples = 0
        for file in self.file_names:
            info = WavInfoReader(os.path.join(self.path_to_input, file))
            self.total_samples = self.total_samples + \
                int(np.fix(info.data.frame_count/self.len_of_samples))
コード例 #5
0
ファイル: utils.py プロジェクト: sudaoblinnnk/DTLN-aec
def collet_wav_info(dir_path, csv_save_path):
    results = list()
    for f in os.listdir(dir_path):
        try:
            f_path = os.path.join(dir_path, f)
            wav_info = WavInfoReader(f_path)
            results.append([f,
                            wav_info.data.frame_count])
        except:
            print(f_path)
        finally:
            pass
        
    
    df = pd.DataFrame(results)
    df.to_csv(csv_save_path, index=None, header=None)
コード例 #6
0
    def __init__(self, path: str, ffprobe_executable='/usr/local/bin/ffprobe'):
        def probe() -> dict:
            cmd = [
                ffprobe_executable, '-of', 'json', '-show_format',
                '-show_streams', path
            ]
            p = subprocess.Popen(cmd,
                                 stdout=subprocess.PIPE,
                                 stderr=subprocess.PIPE)
            stdout, stderr = p.communicate()
            if p.returncode != 0:
                raise subprocess.CalledProcessError(
                    p.returncode, subprocess.list2cmdline(cmd), stderr)
            return json.loads(stdout.decode('utf8'))

        self.path = path
        self.info = WavInfoReader(path)
        self.probe = probe()
コード例 #7
0
def read_metadata(path: str) -> "Metadata":
    metadata = WavInfoReader(path)

    meta = Metadata()
    meta.set_filepath(path)
    meta.set_timestring(metadata.bext.originator_time)
    meta.set_datestring(metadata.bext.originator_date)
    meta.set_codec(metadata.fmt.bits_per_sample)
    meta.set_samplerate(metadata.fmt.sample_rate)
    meta.set_channels(metadata.fmt.channel_count)
    meta.set_scene(metadata.ixml.scene)
    meta.set_take(metadata.ixml.take)
    meta.set_tape(metadata.ixml.tape)
    meta.set_speed([
        l.split("=")[1] for l in metadata.bext.description.split("\r\n")
        if l.startswith("sSPEED")
    ][0])
    meta.set_circled([
        l.split("=")[1] == "TRUE"
        for l in metadata.bext.description.split("\r\n")
        if l.startswith("sCIRCLED")
    ][0])
    meta.set_samplecount(metadata.data.frame_count)

    # Always subtract 2 from regular (non-mixdown) channelnumbers to match the device channels
    index_offset = 2

    # DEBUG
    for track in metadata.ixml.track_list:
        print(track)

    # Process the regular Channels first (excluding downmixes)
    for track in metadata.ixml.track_list:
        if not track.name in ["MixL", "MixR"]:
            tracknumber = int(track.channel_index) - index_offset
        elif track.name == "MixL":
            tracknumber = 9
        elif track.name == "MixR":
            tracknumber = 10
        internal_tracknumber = int(track.interleave_index)
        meta.add_track(internal_tracknumber, tracknumber, track.name)

    return meta
コード例 #8
0
ファイル: utils.py プロジェクト: CorticoAI/denoising_DIHARD18
def get_bitdepth(fn):
    """Return bitdepth of WAV file."""
    if not is_wav(fn):
        raise ValueError('File "%s" is not a valid WAV file.' % fn)
    hinfo = WavInfoReader(fn)
    return hinfo.fmt.bits_per_sample
コード例 #9
0
ファイル: utils.py プロジェクト: CorticoAI/denoising_DIHARD18
def get_num_channels(fn):
    """Return number of channels present in  WAV file."""
    if not is_wav(fn):
        raise ValueError('File "%s" is not a valid WAV file.' % fn)
    hinfo = WavInfoReader(fn)
    return hinfo.fmt.channel_count
コード例 #10
0
ファイル: utils.py プロジェクト: CorticoAI/denoising_DIHARD18
def get_sr(fn):
    """Return sample rate in Hz of WAV file."""
    if not is_wav(fn):
        raise ValueError('File "%s" is not a valid WAV file.' % fn)
    hinfo = WavInfoReader(fn)
    return hinfo.fmt.sample_rate
コード例 #11
0
    def __init__(self,
                 map=None,
                 wav=None,
                 title='',
                 gui=False,
                 pick=False,
                 vgrid=False,
                 size=(18, 9)):
        self.rect_picked = None
        self.rect_color = (0.0, 0.6, 1.0, 1.0)  # '#0099FF'
        self.rect_selected_color = (0.75, 0.75, 0, 1.0)  # 'y'
        self.cluster_colors = [(0.0, 0.6, 1.0, 1.0), (0.0, 1.0, 0.6, 1.0),
                               (0.6, 0.0, 1.0, 1.0), (0.6, 1.0, 0.0, 1.0),
                               (1.0, 0.0, 0.6, 1.0), (1.0, 0.6, 0.0, 1.0)]

        plot.rcParams['keymap.fullscreen'] = 'ctrl+f'
        plot.rcParams['keymap.home'] = ''
        plot.rcParams['keymap.back'] = ''
        plot.rcParams['keymap.forward'] = ''
        plot.rcParams['keymap.pan'] = ''
        plot.rcParams['keymap.zoom'] = 'ctrl+z'
        plot.rcParams['keymap.quit'] = 'ctrl+q'
        plot.rcParams['keymap.grid'] = ''
        plot.rcParams['keymap.yscale'] = ''
        plot.rcParams['keymap.xscale'] = ''
        plot.rcParams['keymap.all_axes'] = ''
        plot.rcParams['toolbar'] = 'None'
        plot.rcParams['keymap.save'] = 'ctrl+s'
        # plot.rcParams.update({'font.family': 'courrier'})

        self.pick = pick
        self.gui = gui
        self.vgrid = vgrid
        self.fig = plot.figure(figsize=size,
                               facecolor='white',
                               tight_layout=True)
        self.plot = plot
        self.title = title

        self.ax = self.fig.add_subplot(1, 1, 1)
        cids = list()
        if self.gui:
            cids.append(
                self.fig.canvas.mpl_connect('key_press_event',
                                            self._on_keypress))
            cids.append(
                self.fig.canvas.mpl_connect('button_press_event',
                                            self._on_click))
            if pick:
                cids.append(
                    self.fig.canvas.mpl_connect('pick_event', self._on_pick))
        self.height = 5
        self.maxx = 0
        self.maxy = 0
        self.end_play = 0
        self.wav = wav
        self.audio = None

        print(self.wav)
        info = WavInfoReader(self.wav)
        print(info)

        if self.wav is not None and self.gui:
            self.audio = AudioPlayer(wav)
            self.timer = self.fig.canvas.new_timer(interval=10)
            self.timer.add_callback(self._update_timeline)
            self.timer.start()

        self.timeline = self.ax.plot([0, 0], [0, 0], color='r')[-1]
        self.map = map
        self.time_stamp = list()
        self.time_stamp_idx = 0