def do_set_caps(self, icaps, ocaps): in_info = GstAudio.AudioInfo() in_info.from_caps(icaps) out_info = GstVideo.VideoInfo() out_info.from_caps(ocaps) self.convert_info = GstAudio.AudioInfo() self.convert_info.set_format(GstAudio.AudioFormat.S32, in_info.rate, in_info.channels, in_info.position) self.converter = GstAudio.AudioConverter.new(GstAudio.AudioConverterFlags.NONE, in_info, self.convert_info, None) self.fig = plt.figure() dpi = self.fig.get_dpi() self.fig.patch.set_alpha(0.3) self.fig.set_size_inches(out_info.width / float(dpi), out_info.height / float(dpi)) self.ax = plt.Axes(self.fig, [0., 0., 1., 1.]) self.fig.add_axes(self.ax) self.ax.set_axis_off() self.ax.set_ylim((GLib.MININT, GLib.MAXINT)) self.agg = self.fig.canvas.switch_backends(FigureCanvasAgg) self.h = None samplesperwindow = int(in_info.rate * in_info.channels * self.window_duration) self.thinning_factor = max(int(samplesperwindow / out_info.width - 1), 1) cap = int(samplesperwindow / self.thinning_factor) self.ax.set_xlim([0, cap]) self.ringbuffer = RingBuffer(capacity=cap) self.ringbuffer.extend([0.0] * cap) self.frame_duration = Gst.util_uint64_scale_int(Gst.SECOND, out_info.fps_d, out_info.fps_n) self.next_time = self.frame_duration self.agg.draw() self.background = self.fig.canvas.copy_from_bbox(self.ax.bbox) self.samplesperbuffer = Gst.util_uint64_scale_int(in_info.rate * in_info.channels, out_info.fps_d, out_info.fps_n) self.next_offset = self.samplesperbuffer self.cur_offset = 0 self.buf_offset = 0 return True
def do_transform_ip(self, buffer: Gst.Buffer) -> Gst.FlowReturn: if not self._model: Gst.warning( f"No model speficied for {self}. Plugin working in passthrough mode" ) return Gst.FlowReturn.OK # print(buffer.get_size(), buffer.duration / 10**6) # map Gst.Buffer to READ content is_ok, map_info = buffer.map(Gst.MapFlags.READ) if is_ok: # parsing audio info # https://lazka.github.io/pgi-docs/GstAudio-1.0/classes/AudioInfo.html audio_info = GstAudio.AudioInfo() audio_info.from_caps(self.sinkpad.get_current_caps()) # bpf = bytes per frame (for S16LE bpf = 2 bytes) # np.int16 -> due to audio format S16LE # https://lazka.github.io/pgi-docs/GstAudio-1.0/enums.html#GstAudio.AudioFormat.S16LE frame = np.ndarray(map_info.size // audio_info.bpf, buffer=map_info.data, dtype=np.int16) self._frames.append(frame) buffer.unmap(map_info) cut_on_silence = self._silence_threshold > 0 num_frames = len(self._frames) if num_frames >= self._max_num_frames_seq: self._do_speech_recognition() else: if cut_on_silence and num_frames >= self._min_num_frames_seq: square = np.sqr(frame) peaksquare = np.max(square) squaresum = np.sum(square) normalizer = 1 << 30 ncs = float(squaresum) / normalizer print(ncs) if ncs > self._silence_threshold: self._silent_buffers_num += 1 if self._silent_buffers_num >= self._silence_duration: self._do_speech_recognition() # self._amplitude_mean += np.mean(frame) # self._amplitude_mean /= 2 # rms = np.sqrt(self._amplitude_mean ** 2) # if num_frames >= self._min_num_frames_seq and rms < self._rms_threshold: # self._do_speech_recognition() return Gst.FlowReturn.OK
def __init__(self): GstBase.BaseSrc.__init__(self) self.info = GstAudio.AudioInfo() self.freq = DEFAULT_FREQ self.volume = DEFAULT_VOLUME self.mute = DEFAULT_MUTE self.set_live(DEFAULT_IS_LIVE) self.set_format(Gst.Format.TIME)
def __init__(self, buffer: Gst.Buffer, audio_info: GstAudio.AudioInfo = None, caps: Gst.Caps = None): self.__buffer = buffer self.__audio_info = None if audio_info: self.__audio_info = audio_info elif caps: self.__audio_info = GstAudio.AudioInfo() self.__audio_info.from_caps(caps) else: raise RuntimeError("One of audio_info or caps is required")
def convert_speakers_to_bitmask(speaker_list): """Converts a list of speaker names into a Gstreamer bitmask. Args: speaker_list: A list of speaker pair names, e.g. ``[lr, c, lfe, lrs, lrtm]`` Returns: num_channels: Number of audio channels in provided configuration. bitmask: A Gstreamer bitmask for the specified configuration. """ chs = [] num_channels = 0 for item in speaker_list: positions = AUDIO_CHANNEL_POSITIONS[item] chs.extend(positions) num_channels = num_channels + len(positions) _, bitmask = GstAudio.audio_channel_positions_to_mask(chs, False) return num_channels, bitmask
if __name__ == '__main__': data = CustomData() # https://lazka.github.io/pgi-docs/#Gst-1.0/functions.html#Gst.init print('initializing Gst...') Gst.init(None) if not data.create_elements(): print('failed to create elements nor a pipeline') sys.exit(1) data.setup_wavescope() # https://lazka.github.io/pgi-docs/#GstAudio-1.0/classes/AudioInfo.html#GstAudio.AudioInfo.set_format info = GstAudio.AudioInfo() info.set_format(GstAudio.AudioFormat.S16, SAMPLE_RATE, 1, None) # https://lazka.github.io/pgi-docs/#GstAudio-1.0/classes/AudioInfo.html#GstAudio.AudioInfo.to_caps audio_caps = info.to_caps() data.configure_appsrc(audio_caps) data.configure_appsink(audio_caps) data.link_always_pads() data.link_request_pads() if not data.play_pipeline(): sys.exit(1) data.run_main() print('disposing the data...')