def skip_frames_ffmpeg(filename, skip=0):
    if skip == 0:
        return

    import os

    of, fex = os.path.splitext(filename)

    pts_ratio = 1 / (skip + 1)
    atempo_ratio = skip + 1

    outname = of + '_skip' + fex

    if has_audio(filename):
        cmd = [
            'ffmpeg', '-y', '-i', filename, '-filter_complex',
            f'[0:v]setpts={pts_ratio}*PTS[v];[0:a]atempo={atempo_ratio}[a]',
            '-map', '[v]', '-map', '[a]', '-q:v', '3', '-shortest', outname
        ]
    else:
        cmd = [
            'ffmpeg', '-y', '-i', filename, '-filter_complex',
            f'[0:v]setpts={pts_ratio}*PTS[v]', '-map', '[v]', '-q:v', '3',
            outname
        ]

    ffmpeg_cmd(cmd, get_length(filename), pb_prefix='Skipping frames:')
Exemple #2
0
def mg_cropvideo_ffmpeg(filename,
                        crop_movement='Auto',
                        motion_box_thresh=0.1,
                        motion_box_margin=12):
    """
    Crops the video using ffmpeg.

    Args:
        filename (str): Path to the video file.
        crop_movement (str, optional): 'Auto' finds the bounding box that contains the total motion in the video. Motion threshold is given by motion_box_thresh. 'Manual' opens up a simple GUI that is used to crop the video manually by looking at the first frame. Defaults to 'Auto'.
        motion_box_thresh (float, optional): Only meaningful if `crop_movement='Auto'`. Takes floats between 0 and 1, where 0 includes all the motion and 1 includes none. Defaults to 0.1.
        motion_box_margin (int, optional): Only meaningful if `crop_movement='Auto'`. Adds margin to the bounding box. Defaults to 12.

    Returns:
        str: Path to the cropped video.
    """

    global w, h, x, y

    pb = MgProgressbar(total=get_length(filename),
                       prefix='Rendering cropped video:')

    if crop_movement.lower() == 'manual':

        scale_ratio = get_screen_video_ratio(filename)
        width, height = get_widthheight(filename)
        scaled_width, scaled_height = [
            int(elem * scale_ratio) for elem in [width, height]
        ]
        first_frame_as_image = get_first_frame_as_image(filename,
                                                        pict_format='.jpg')

        # Cropping UI moved to another subprocess to avoid cv2.waitKey crashing Python with segmentation fault on Linux in Terminal
        import threading
        x = threading.Thread(target=run_cropping_window,
                             args=(first_frame_as_image, scale_ratio,
                                   scaled_width, scaled_height))
        # run_cropping_window(first_frame_as_image, scale_ratio, scaled_width, scaled_height)
        x.start()
        x.join()

    elif crop_movement.lower() == 'auto':
        w, h, x, y = find_motion_box_ffmpeg(
            filename,
            motion_box_thresh=motion_box_thresh,
            motion_box_margin=motion_box_margin)

    cropped_video = crop_ffmpeg(filename, w, h, x, y)

    if crop_movement.lower() == 'manual':
        cv2.destroyAllWindows()
        os.remove(first_frame_as_image)

    return cropped_video
Exemple #3
0
def contrast_brightness_ffmpeg(filename, contrast=0, brightness=0):
    """
    Applies contrast and brightness adjustments on the source video using ffmpeg.

    Args:
        filename (str): Path to the video to process.
        contrast (int or float, optional): Increase or decrease contrast. Values range from -100 to 100. Defaults to 0.
        brightness (int or float, optional): Increase or decrease brightness. Values range from -100 to 100. Defaults to 0.

    Outputs:
        `filename`_cb.<file extension>
    """
    if contrast == 0 and brightness == 0:
        return

    import os
    import numpy as np

    of, fex = os.path.splitext(filename)

    # keeping values in sensible range
    contrast = np.clip(contrast, -100.0, 100.0)
    brightness = np.clip(brightness, -100.0, 100.0)

    # ranges are "handpicked" so that the results are close to the results of contrast_brightness_cv2 (deprecated)
    if contrast == 0:
        p_saturation, p_contrast, p_brightness = 0, 0, 0
    elif contrast > 0:
        p_saturation = scale_num(contrast, 0, 100, 1, 1.9)
        p_contrast = scale_num(contrast, 0, 100, 1, 2.3)
        p_brightness = scale_num(contrast, 0, 100, 0, 0.04)
    elif contrast < 0:
        p_saturation = scale_num(contrast, 0, -100, 1, 0)
        p_contrast = scale_num(contrast, 0, -100, 1, 0)
        p_brightness = 0

    if brightness != 0:
        p_brightness += brightness / 100

    outname = of + '_cb' + fex

    cmd = [
        'ffmpeg', '-y', '-i', filename, '-vf',
        f'eq=saturation={p_saturation}:contrast={p_contrast}:brightness={p_brightness}',
        '-q:v', '3', "-c:a", "copy", outname
    ]

    ffmpeg_cmd(cmd,
               get_length(filename),
               pb_prefix='Adjusting contrast and brightness:')
def contrast_brightness_ffmpeg(filename, contrast=0, brightness=0):
    if contrast == 0 and brightness == 0:
        return

    import os
    import numpy as np

    of, fex = os.path.splitext(filename)

    # keeping values in sensible range
    contrast = np.clip(contrast, -100.0, 100.0)
    brightness = np.clip(brightness, -100.0, 100.0)

    # ranges are "handpicked" so that the results are close to the results of mg_contrast_brightness
    if contrast == 0:
        p_saturation, p_contrast, p_brightness = 0, 0, 0
    elif contrast > 0:
        p_saturation = scale_num(contrast, 0, 100, 1, 1.9)
        p_contrast = scale_num(contrast, 0, 100, 1, 2.3)
        p_brightness = scale_num(contrast, 0, 100, 0, 0.04)
    elif contrast < 0:
        p_saturation = scale_num(contrast, 0, -100, 1, 0)
        p_contrast = scale_num(contrast, 0, -100, 1, 0)
        p_brightness = 0

    if brightness != 0:
        p_brightness += brightness / 100

    outname = of + '_cb' + fex

    cmd = [
        'ffmpeg', '-y', '-i', filename, '-vf',
        f'eq=saturation={p_saturation}:contrast={p_contrast}:brightness={p_brightness}',
        '-q:v', '3', "-c:a", "copy", outname
    ]

    ffmpeg_cmd(cmd,
               get_length(filename),
               pb_prefix='Adjusting contrast and brightness:')
Exemple #5
0
def skip_frames_ffmpeg(filename, skip=0):
    """
    Time-shrinks the video by skipping (discarding) every n frames determined by `skip`. To discard half of the frames (ie. double the speed of the video) use `skip=1`.

    Args:
        filename (str): Path to the video to process.
        skip (int, optional): Discard `skip` frames before keeping one. Defaults to 0.

    Outputs:
        `filename`_skip.<file extension>
    """
    if skip == 0:
        return

    import os

    of, fex = os.path.splitext(filename)

    pts_ratio = 1 / (skip + 1)
    atempo_ratio = skip + 1

    outname = of + '_skip' + fex

    if has_audio(filename):
        cmd = [
            'ffmpeg', '-y', '-i', filename, '-filter_complex',
            f'[0:v]setpts={pts_ratio}*PTS[v];[0:a]atempo={atempo_ratio}[a]',
            '-map', '[v]', '-map', '[a]', '-q:v', '3', '-shortest', outname
        ]
    else:
        cmd = [
            'ffmpeg', '-y', '-i', filename, '-filter_complex',
            f'[0:v]setpts={pts_ratio}*PTS[v]', '-map', '[v]', '-q:v', '3',
            outname
        ]

    ffmpeg_cmd(cmd, get_length(filename), pb_prefix='Skipping frames:')
def videograms_ffmpeg(self):
    """
    Usees FFMPEG as backend. Averages videoframes by axes, and creates two images of the horizontal-axis and vertical-axis stacks.
    In these stacks, a single row or column corresponds to a frame from the source video, and the index
    of the row or column corresponds to the index of the source frame.

    Outputs
    -------
    - `filename`_vgx.png

        A horizontal videogram of the source video.
    - `filename`_vgy.png

        A vertical videogram of the source video.

    Returns
    -------
    - list(MgImage, MgImage)

        A tuple with the string paths to the horizontal and vertical videograms respectively. 
    """

    width, height = get_widthheight(self.filename)
    framecount = get_framecount(self.filename)
    length = get_length(self.filename)

    outname = self.of + '_vgy.png'
    cmd = ['ffmpeg', '-y', '-i', self.filename, '-frames', '1', '-vf',
           f'scale=1:{height}:sws_flags=area,normalize,tile={framecount}x1', outname]
    ffmpeg_cmd(cmd, length, pb_prefix="Rendering horizontal videogram:")

    outname = self.of + '_vgx.png'
    cmd = ['ffmpeg', '-y', '-i', self.filename, '-frames', '1', '-vf',
           f'scale={width}:1:sws_flags=area,normalize,tile=1x{framecount}', outname]
    ffmpeg_cmd(cmd, length, pb_prefix="Rendering vertical videogram:")

    return MgList([MgImage(self.of+'_vgx.png'), MgImage(self.of+'_vgy.png')])
def mg_cropvideo_ffmpeg(
        filename,
        crop_movement='Auto',
        motion_box_thresh=0.1,
        motion_box_margin=12):

    global frame_mask, drawing, g_val, x_start, x_stop, y_start, y_stop
    x_start, y_start = -1, -1
    x_stop, y_stop = -1, -1

    drawing = False
    pb = MgProgressbar(total=get_length(filename),
                       prefix='Rendering cropped video:')

    if crop_movement.lower() == 'manual':

        scale_ratio = get_screen_video_ratio(filename)

        width, height = get_widthheight(filename)

        scaled_width, scaled_height = [
            int(elem * scale_ratio) for elem in [width, height]]

        first_frame_as_image = get_first_frame_as_image(
            filename, pict_format='.jpg')
        frame = cv2.imread(first_frame_as_image)
        frame_scaled = cv2.resize(frame, (scaled_width, scaled_height))

        frame_mask = np.zeros(frame_scaled.shape)
        name_str = 'Draw rectangle and press "C" to crop'
        cv2.namedWindow(name_str, cv2.WINDOW_AUTOSIZE)
        cv2.setMouseCallback(name_str, draw_rectangle, param=frame_scaled)
        g_val = 220
        while(1):
            cv2.imshow(name_str, frame_scaled*(frame_mask != g_val) +
                       frame_mask.astype(np.uint8))
            k = cv2.waitKey(1) & 0xFF
            if k == ord('c') or k == ord('C'):
                break
        cv2.destroyAllWindows()

        if x_stop < x_start:
            temp = x_start
            x_start = x_stop
            x_stop = temp
        if y_stop < y_start:
            temp = y_start
            y_start = y_stop
            y_stop = temp

        w, h, x, y = x_stop - x_start, y_stop - y_start, x_start, y_start

        if scale_ratio < 1:
            w, h, x, y = [int(elem / scale_ratio) for elem in [w, h, x, y]]

    elif crop_movement.lower() == 'auto':
        w, h, x, y = find_motion_box_ffmpeg(
            filename, motion_box_thresh=motion_box_thresh, motion_box_margin=motion_box_margin)

    cropped_video = crop_ffmpeg(filename, w, h, x, y)

    if crop_movement.lower() == 'manual':
        cv2.destroyAllWindows()
        os.remove(first_frame_as_image)

    return cropped_video
def find_motion_box_ffmpeg(filename, motion_box_thresh=0.1, motion_box_margin=12):
    import subprocess
    import os
    import matplotlib
    import numpy as np
    total_time = get_length(filename)
    width, height = get_widthheight(filename)
    crop_str = ''

    thresh_color = matplotlib.colors.to_hex(
        [motion_box_thresh, motion_box_thresh, motion_box_thresh])
    thresh_color = '0x' + thresh_color[1:]

    pb = MgProgressbar(total=total_time, prefix='Finding area of motion:')

    command = ['ffmpeg', '-y', '-i', filename, '-f', 'lavfi', '-i', f'color={thresh_color},scale={width}:{height}', '-f', 'lavfi', '-i', f'color=black,scale={width}:{height}', '-f',
               'lavfi', '-i', f'color=white,scale={width}:{height}', '-lavfi', 'format=gray,tblend=all_mode=difference,threshold,cropdetect=round=2:limit=0:reset=0', '-f', 'null', '/dev/null']

    process = subprocess.Popen(
        command, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, universal_newlines=True)

    try:
        while True:
            out = process.stdout.readline()
            if out == '':
                process.wait()
                break
            else:
                out_list = out.split()
                crop_and_time = sorted(
                    [elem for elem in out_list if elem.startswith('t:') or elem.startswith('crop=')])
                if len(crop_and_time) != 0:
                    crop_str = crop_and_time[0]
                    time_float = float(crop_and_time[1][2:])
                    pb.progress(time_float)

        pb.progress(total_time)

        crop_width, crop_height, crop_x, crop_y = [
            int(elem) for elem in crop_str[5:].split(':')]

        motion_box_margin = roundup(motion_box_margin, 4)

        crop_width = np.clip(crop_width+motion_box_margin, 4, width)
        crop_height = np.clip(crop_height+motion_box_margin, 4, height)
        crop_x = np.clip(crop_x-(motion_box_margin/2), 4, width)
        crop_y = np.clip(crop_y-(motion_box_margin/2), 4, height)

        if crop_x + crop_width > width:
            crop_x = width - crop_width
        else:
            crop_x = np.clip(crop_x, 0, width)
        if crop_y + crop_height > height:
            crop_y = height - crop_height
        else:
            crop_y = np.clip(crop_y, 0, height)

        crop_width, crop_height, crop_x, crop_y = [
            int(elem) for elem in [crop_width, crop_height, crop_x, crop_y]]

        return crop_width, crop_height, crop_x, crop_y

    except KeyboardInterrupt:
        try:
            process.terminate()
        except OSError:
            pass
        process.wait()
        raise KeyboardInterrupt
def history_ffmpeg(self,
                   filename='',
                   history_length=10,
                   weights=1,
                   normalize=False,
                   norm_strength=1,
                   norm_smooth=0):
    """
    This function  creates a video where each frame is the average of the 
    n previous frames, where n is determined by `history_length`.
    The history frames are summed up and normalized, and added to the 
    current frame to show the history. Based on ffmpeg.

    Parameters
    ----------
    - filename : str, optional

        Path to the input video file. If not specified the video file pointed to by the MgObject is used.
    - history_length : int, optional

        Default is 10. Number of frames to be saved in the history tail.

    - weights: int, float, str, list, optional

        Default is 1. Defines the weight or weights applied to the frames in the history tail. If given as list
        the first element in the list will correspond to the weight of the newest frame in the tail. If given as
        a str - like "3 1.2 1" - it will be automatically converted to a list - like [3, 1.2, 1].

    - normalize: bool, optional

        Default is `False` (no normalization). If `True`, the history video will be normalized. This can be useful
        when processing motion (frame difference) videos.

    - norm_strength: int, float, optional

        Default is 1. Defines the strength of the normalization where 1 represents full strength.

    - norm_smooth: int, optional

        Default is 0 (no smoothing). Defines the number of previous frames to use for temporal smoothing. The input 
        range of each channel is smoothed using a rolling average over the current frame and the `norm_smooth` previous frames.

    Outputs
    -------
    - `filename`_history.avi

    Returns
    -------
    - MgObject 
        A new MgObject pointing to the output '_history' video file.
    """

    if filename == '':
        filename = self.filename

    of, fex = os.path.splitext(filename)

    if type(weights) in [int, float]:
        weights_map = np.ones(history_length)
        weights_map[-1] = weights
        str_weights = ' '.join([str(weight) for weight in weights_map])
    elif type(weights) == list:
        typecheck_list = [type(item) in [int, float] for item in weights]
        if False in typecheck_list:
            raise ParameterError(
                'Found wrong type(s) in the list of weights. Use ints and floats.'
            )
        elif len(weights) > history_length:
            raise ParameterError(
                'history_length must be greater than or equal to the number of weights specified in weights.'
            )
        else:
            weights_map = np.ones(history_length - len(weights))
            weights.reverse()
            weights_map = list(weights_map)
            weights_map += weights
            str_weights = ' '.join([str(weight) for weight in weights_map])
    elif type(weights) == str:
        weights_as_list = weights.split()
        typecheck_list = [
            type(item) in [int, float] for item in weights_as_list
        ]
        if False in typecheck_list:
            raise ParameterError(
                'Found wrong type(s) in the list of weights. Use ints and floats.'
            )
        elif len(weights) > history_length:
            raise ParameterError(
                'history_length must be greater than or equal to the number of weights specified in weights.'
            )
        else:
            weights_map = np.ones(history_length - len(weights_as_list))
            weights_as_list.reverse()
            weights_map += weights_as_list
            str_weights = ' '.join([str(weight) for weight in weights_map])
    else:
        raise ParameterError(
            'Wrong type used for weights. Use int, float, str, or list.')

    if type(normalize) != bool:
        raise ParameterError('Wrong type used for normalize. Use only bool.')

    if normalize:
        if type(norm_strength) not in [int, float]:
            raise ParameterError(
                'Wrong type used for norm_strength. Use int or float.')
        if type(norm_smooth) != int:
            raise ParameterError(
                'Wrong type used for norm_smooth. Use only int.')

    outname = of + '_history' + fex
    if normalize:
        if norm_smooth != 0:
            cmd = [
                'ffmpeg', '-y', '-i', filename, '-filter_complex',
                f'tmix=frames={history_length}:weights={str_weights},normalize=independence=0:strength={norm_strength}:smoothing={norm_smooth}',
                '-q:v', '3', '-c:a', 'copy', outname
            ]
        else:
            cmd = [
                'ffmpeg', '-y', '-i', filename, '-filter_complex',
                f'tmix=frames={history_length}:weights={str_weights},normalize=independence=0:strength={norm_strength}',
                '-q:v', '3', '-c:a', 'copy', outname
            ]
    else:
        cmd = [
            'ffmpeg', '-y', '-i', filename, '-vf',
            f'tmix=frames={history_length}:weights={str_weights}', '-q:v', '3',
            '-c:a', 'copy', outname
        ]

    # success = ffmpeg_cmd(cmd, get_length(filename),
    #                      pb_prefix='Rendering history video:')

    ffmpeg_cmd(cmd, get_length(filename), pb_prefix='Rendering history video:')

    # if success:
    #     destination_video = self.of + '_history' + self.fex
    #     return musicalgestures.MgObject(destination_video, color=self.color, returned_by_process=True)

    destination_video = self.of + '_history' + self.fex
    return musicalgestures.MgObject(destination_video,
                                    color=self.color,
                                    returned_by_process=True)
Exemple #10
0
def videograms_ffmpeg(self):
    """
    Renders horizontal and vertical videograms of the source video using ffmpeg. Averages videoframes by axes, and creates two images of the horizontal-axis and vertical-axis stacks. In these stacks, a single row or column corresponds to a frame from the source video, and the index of the row or column corresponds to the index of the source frame.

    Outputs:
        `self.filename`_vgx.png
        `self.filename`_vgy.png

    Returns:
        MgList(MgImage, MgImage): An MgList with the MgImage objects referring to the horizontal and vertical videograms respectively. 
    """

    width, height = get_widthheight(self.filename)
    framecount = get_framecount(self.filename)

    def calc_skipfactor(width, height, framecount):
        """
        Helper function to calculate the necessary frame-skipping to avoid integer overflow. This makes sure that we can succesfully create videograms even on many-hours-long videos as well.

        Args:
            width (int): The width of the video.
            height (int): The height of the video.
            framecount (int): The number of frames in the video.

        Returns:
            list(int, int): The necessary dilation factors to apply on the video for the horizontal and vertical videograms, respectively.
        """

        intmax = 2147483647
        skipfactor_x = int(
            math.ceil(framecount * 8 / (intmax / (height + 128) - 1024)))
        skipfactor_y = int(
            math.ceil(framecount / (intmax / ((width * 8) + 1024) - 128)))
        return skipfactor_x, skipfactor_y

    testx, testy = calc_skipfactor(width, height, framecount)

    if testx > 1 or testy > 1:
        necessary_skipfactor = max([testx, testy])
        print(
            f'{os.path.basename(self.filename)} is too large to process. Applying minimal skipping necessary...'
        )

        skip_frames_ffmpeg(self.filename, skip=necessary_skipfactor - 1)

        shortened_file = self.of + '_skip' + self.fex
        framecount = get_framecount(shortened_file)
        length = get_length(shortened_file)

        outname = self.of + '_skip_vgy.png'
        cmd = [
            'ffmpeg', '-y', '-i', shortened_file, '-vf',
            f'scale=1:{height}:sws_flags=area,normalize,tile={framecount}x1',
            '-aspect', f'{framecount}:{height}', '-frames', '1', outname
        ]
        ffmpeg_cmd(cmd,
                   length,
                   stream=False,
                   pb_prefix="Rendering horizontal videogram:")

        outname = self.of + '_skip_vgx.png'
        cmd = [
            'ffmpeg', '-y', '-i', shortened_file, '-vf',
            f'scale={width}:1:sws_flags=area,normalize,tile=1x{framecount}',
            '-aspect', f'{width}:{framecount}', '-frames', '1', outname
        ]
        ffmpeg_cmd(cmd,
                   length,
                   stream=False,
                   pb_prefix="Rendering vertical videogram:")

        return MgList([
            MgImage(self.of + '_skip_vgx.png'),
            MgImage(self.of + '_skip_vgy.png')
        ])

    else:
        length = get_length(self.filename)

        outname = self.of + '_vgy.png'
        cmd = [
            'ffmpeg', '-y', '-i', self.filename, '-frames', '1', '-vf',
            f'scale=1:{height}:sws_flags=area,normalize,tile={framecount}x1',
            '-aspect', f'{framecount}:{height}', outname
        ]
        ffmpeg_cmd(cmd,
                   length,
                   stream=False,
                   pb_prefix="Rendering horizontal videogram:")

        outname = self.of + '_vgx.png'
        cmd = [
            'ffmpeg', '-y', '-i', self.filename, '-frames', '1', '-vf',
            f'scale={width}:1:sws_flags=area,normalize,tile=1x{framecount}',
            '-aspect', f'{width}:{framecount}', outname
        ]
        ffmpeg_cmd(cmd,
                   length,
                   stream=False,
                   pb_prefix="Rendering vertical videogram:")

        return MgList(
            [MgImage(self.of + '_vgx.png'),
             MgImage(self.of + '_vgy.png')])
Exemple #11
0
def mg_audio_descriptors(filename=None,
                         window_size=4096,
                         overlap=8,
                         mel_filters=512,
                         power=2,
                         dpi=300,
                         autoshow=True):
    """
    Renders a figure of plots showing spectral/loudness descriptors, including RMS energy, spectral flatness,
    centroid, bandwidth, rolloff of the video/audio file.

    Parameters
    ----------
    - filename : str, optional

        Path to the audio/video file to be processed.

    - window_size : int, optional

        The size of the FFT frame. Default is 4096.

    - overlap : int, optional

        The window overlap. The hop size is window_size / overlap.
        Example: window_size=1024, overlap=4 -> hop=256

    - mel_filters : int, optional

        The number of filters to use for filtering the frequency domain. Affects the
        vertical resolution (sharpness) of the spectrogram. NB: Too high values with
        relatively small window sizes can result in artifacts (typically black lines)
        in the resulting image. Default is 512.

    - power : int, float, optional

        The steepness of the curve for the color mapping. Default is 2.

    - dpi : int, optional

        Image quality of the rendered figure. Default is 300 DPI.

    - autoshow: bool, optional

        Whether to show the resulting figure automatically. Default is `True` (figure is shown).

    Outputs
    -------

    - `filename` + '_descriptors.png'

    Returns
    -------
    - MgFigure

        An MgFigure object referring to the internal figure and its data.
    """

    if filename == None:
        print("No filename was given.")
        return

    if not has_audio(filename):
        print('The video has no audio track.')
        return

    of, fex = os.path.splitext(filename)

    hop_size = int(window_size / overlap)

    y, sr = librosa.load(filename, sr=None)

    cent = librosa.feature.spectral_centroid(y=y,
                                             sr=sr,
                                             n_fft=window_size,
                                             hop_length=hop_size)
    spec_bw = librosa.feature.spectral_bandwidth(y=y,
                                                 sr=sr,
                                                 n_fft=window_size,
                                                 hop_length=hop_size)
    flatness = librosa.feature.spectral_flatness(y=y,
                                                 n_fft=window_size,
                                                 hop_length=hop_size)
    rolloff = librosa.feature.spectral_rolloff(y=y,
                                               sr=sr,
                                               n_fft=window_size,
                                               hop_length=hop_size,
                                               roll_percent=0.99)
    rolloff_min = librosa.feature.spectral_rolloff(y=y,
                                                   sr=sr,
                                                   n_fft=window_size,
                                                   hop_length=hop_size,
                                                   roll_percent=0.01)
    rms = librosa.feature.rms(y=y,
                              frame_length=window_size,
                              hop_length=hop_size)

    S = librosa.feature.melspectrogram(y=y,
                                       sr=sr,
                                       n_mels=mel_filters,
                                       fmax=sr / 2,
                                       n_fft=window_size,
                                       hop_length=hop_size,
                                       power=power)

    fig, ax = plt.subplots(figsize=(12, 8), dpi=dpi, nrows=3, sharex=True)

    img = librosa.display.specshow(librosa.power_to_db(S,
                                                       ref=np.max,
                                                       top_db=120),
                                   sr=sr,
                                   y_axis='mel',
                                   fmax=sr / 2,
                                   x_axis='time',
                                   hop_length=hop_size,
                                   ax=ax[2])

    # get rid of "default" ticks
    ax[2].yaxis.set_minor_locator(matplotlib.ticker.NullLocator())

    ax[0].set(title=os.path.basename(filename))
    length = get_length(filename)
    plot_xticks = np.arange(0, length + 0.1, length / 20)
    ax[2].set(xticks=plot_xticks)

    freq_ticks = [elem * 100 for elem in range(10)]
    freq_ticks = [250]
    freq = 500
    while freq < sr / 2:
        freq_ticks.append(freq)
        freq *= 1.5

    freq_ticks = [round(elem, -1) for elem in freq_ticks]
    freq_ticks_labels = [
        str(round(elem / 1000, 1)) + 'k' if elem > 1000 else int(round(elem))
        for elem in freq_ticks
    ]

    ax[2].set(yticks=(freq_ticks))
    ax[2].set(yticklabels=(freq_ticks_labels))

    times = librosa.times_like(cent,
                               sr=sr,
                               n_fft=window_size,
                               hop_length=hop_size)

    ax[2].fill_between(times,
                       cent[0] - spec_bw[0],
                       cent[0] + spec_bw[0],
                       alpha=0.5,
                       label='Centroid +- bandwidth')
    ax[2].plot(times, cent.T, label='Centroid', color='y')
    ax[2].plot(times, rolloff[0], label='Roll-off frequency (0.99)')
    ax[2].plot(times,
               rolloff_min[0],
               color='r',
               label='Roll-off frequency (0.01)')

    ax[2].legend(loc='upper right')

    ax[1].plot(times, flatness.T, label='Flatness', color='y')
    ax[1].legend(loc='upper right')

    ax[0].semilogy(times, rms[0], label='RMS Energy')
    ax[0].legend(loc='upper right')

    plt.tight_layout()
    plt.savefig('%s_descriptors.png' % of, format='png')

    if not autoshow:
        plt.close()

    # create MgFigure
    data = {
        "hop_size": hop_size,
        "sr": sr,
        "of": of,
        "times": times,
        "S": S,
        "length": length,
        "cent": cent,
        "spec_bw": spec_bw,
        "rolloff": rolloff,
        "rolloff_min": rolloff_min,
        "flatness": flatness,
        "rms": rms
    }

    mgf = MgFigure(figure=fig,
                   figure_type='audio.descriptors',
                   data=data,
                   layers=None,
                   image=of + '_descriptors.png')

    return mgf
Exemple #12
0
def mg_audio_spectrogram(filename=None,
                         window_size=4096,
                         overlap=8,
                         mel_filters=512,
                         power=2,
                         dpi=300,
                         autoshow=True):
    """
    Renders a figure showing the mel-scaled spectrogram of the video/audio file.

    Parameters
    ----------
    - filename : str, optional

        Path to the audio/video file to be processed.

    - window_size : int, optional

        The size of the FFT frame. Default is 4096.

    - overlap : int, optional

        The window overlap. The hop size is window_size / overlap.
        Example: window_size=1024, overlap=4 -> hop=256

    - mel_filters : int, optional

        The number of filters to use for filtering the frequency domain. Affects the
        vertical resolution (sharpness) of the spectrogram. NB: Too high values with
        relatively small window sizes can result in artifacts (typically black lines)
        in the resulting image. Default is 512.

    - power : int, float, optional

        The steepness of the curve for the color mapping. Default is 2.

    - dpi : int, optional

        Image quality of the rendered figure. Default is 300 DPI.

    - autoshow: bool, optional

        Whether to show the resulting figure automatically. Default is `True` (figure is shown).

    Outputs
    -------

    - `filename` + '_spectrogram.png'

    Returns
    -------
    - MgFigure

        An MgFigure object referring to the internal figure and its data.
    """
    if filename == None:
        print("No filename was given.")
        return

    if not has_audio(filename):
        print('The video has no audio track.')
        return

    of, fex = os.path.splitext(filename)

    hop_size = int(window_size / overlap)

    y, sr = librosa.load(filename, sr=None)

    S = librosa.feature.melspectrogram(y=y,
                                       sr=sr,
                                       n_mels=mel_filters,
                                       fmax=sr / 2,
                                       n_fft=window_size,
                                       hop_length=hop_size,
                                       power=power)

    fig, ax = plt.subplots(figsize=(12, 6), dpi=dpi)

    img = librosa.display.specshow(librosa.power_to_db(S,
                                                       ref=np.max,
                                                       top_db=120),
                                   sr=sr,
                                   y_axis='mel',
                                   fmax=sr / 2,
                                   x_axis='time',
                                   hop_length=hop_size,
                                   ax=ax)

    colorbar_ticks = range(-120, 1, 10)
    fig.colorbar(img, format='%+2.0f dB', ticks=colorbar_ticks)

    # get rid of "default" ticks
    ax.yaxis.set_minor_locator(matplotlib.ticker.NullLocator())

    ax.set(title=os.path.basename(filename))
    length = get_length(filename)
    plot_xticks = np.arange(0, length + 0.1, length / 20)
    ax.set(xticks=plot_xticks)

    freq_ticks = [elem * 100 for elem in range(10)]
    freq_ticks = []
    freq = 100
    while freq < sr / 2:
        freq_ticks.append(freq)
        freq *= 1.3

    freq_ticks = [round(elem, -2) for elem in freq_ticks]
    freq_ticks.append(sr / 2)
    freq_ticks_labels = [
        str(round(elem / 1000, 1)) + 'k' if elem > 1000 else int(round(elem))
        for elem in freq_ticks
    ]

    ax.set(yticks=(freq_ticks))
    ax.set(yticklabels=(freq_ticks_labels))

    plt.tight_layout()

    plt.savefig('%s_spectrogram.png' % of, format='png')

    if not autoshow:
        plt.close()

    # create MgFigure
    data = {"hop_size": hop_size, "sr": sr, "of": of, "S": S, "length": length}

    mgf = MgFigure(figure=fig,
                   figure_type='audio.spectrogram',
                   data=data,
                   layers=None,
                   image=of + '_spectrogram.png')

    return mgf
Exemple #13
0
def find_motion_box_ffmpeg(filename,
                           motion_box_thresh=0.1,
                           motion_box_margin=12):
    """
    Helper function to find the area of motion in a video, using ffmpeg.

    Args:
        filename (str): Path to the video file.
        motion_box_thresh (float, optional): Pixel threshold to apply to the video before assessing the area of motion. Defaults to 0.1.
        motion_box_margin (int, optional): Margin (in pixels) to add to the detected motion box. Defaults to 12.

    Raises:
        KeyboardInterrupt: In case we stop the process manually.

    Returns:
        int: The width of the motion box.
        int: The height of the motion box.
        int: The X coordinate of the top left corner of the motion box.
        int: The Y coordinate of the top left corner of the motion box.
    """

    import subprocess
    import os
    import matplotlib
    import numpy as np
    total_time = get_length(filename)
    width, height = get_widthheight(filename)
    crop_str = ''

    thresh_color = matplotlib.colors.to_hex(
        [motion_box_thresh, motion_box_thresh, motion_box_thresh])
    thresh_color = '0x' + thresh_color[1:]

    pb = MgProgressbar(total=total_time, prefix='Finding area of motion:')

    command = [
        'ffmpeg', '-y', '-i', filename, '-f', 'lavfi', '-i',
        f'color={thresh_color},scale={width}:{height}', '-f', 'lavfi', '-i',
        f'color=black,scale={width}:{height}', '-f', 'lavfi', '-i',
        f'color=white,scale={width}:{height}', '-lavfi',
        'format=gray,tblend=all_mode=difference,threshold,cropdetect=round=2:limit=0:reset=0',
        '-f', 'null', '/dev/null'
    ]

    process = subprocess.Popen(command,
                               stdout=subprocess.PIPE,
                               stderr=subprocess.STDOUT,
                               universal_newlines=True)

    try:
        while True:
            out = process.stdout.readline()
            if out == '':
                process.wait()
                break
            else:
                out_list = out.split()
                crop_and_time = sorted([
                    elem for elem in out_list
                    if elem.startswith('t:') or elem.startswith('crop=')
                ])
                if len(crop_and_time) != 0:
                    crop_str = crop_and_time[0]
                    time_float = float(crop_and_time[1][2:])
                    pb.progress(time_float)

        pb.progress(total_time)

        crop_width, crop_height, crop_x, crop_y = [
            int(elem) for elem in crop_str[5:].split(':')
        ]

        motion_box_margin = roundup(motion_box_margin, 4)

        crop_width = np.clip(crop_width + motion_box_margin, 4, width)
        crop_height = np.clip(crop_height + motion_box_margin, 4, height)
        crop_x = np.clip(crop_x - (motion_box_margin / 2), 4, width)
        crop_y = np.clip(crop_y - (motion_box_margin / 2), 4, height)

        if crop_x + crop_width > width:
            crop_x = width - crop_width
        else:
            crop_x = np.clip(crop_x, 0, width)
        if crop_y + crop_height > height:
            crop_y = height - crop_height
        else:
            crop_y = np.clip(crop_y, 0, height)

        crop_width, crop_height, crop_x, crop_y = [
            int(elem) for elem in [crop_width, crop_height, crop_x, crop_y]
        ]

        return crop_width, crop_height, crop_x, crop_y

    except KeyboardInterrupt:
        try:
            process.terminate()
        except OSError:
            pass
        process.wait()
        raise KeyboardInterrupt
Exemple #14
0
    def spectrogram(self,
                    window_size=4096,
                    overlap=8,
                    mel_filters=512,
                    power=2,
                    dpi=300,
                    autoshow=True,
                    title=None):
        """
        Renders a figure showing the mel-scaled spectrogram of the video/audio file.

        Args:
            window_size (int, optional): The size of the FFT frame. Defaults to 4096.
            overlap (int, optional): The window overlap. The hop size is window_size / overlap. Example: window_size=1024, overlap=4 -> hop=256. Defaults to 8.
            mel_filters (int, optional): The number of filters to use for filtering the frequency domain. Affects the vertical resolution (sharpness) of the spectrogram. NB: Too high values with relatively small window sizes can result in artifacts (typically black lines) in the resulting image. Defaults to 512.
            power (float, optional): The steepness of the curve for the color mapping. Defaults to 2.
            dpi (int, optional): Image quality of the rendered figure in DPI. Defaults to 300.
            autoshow (bool, optional): Whether to show the resulting figure automatically. Defaults to True.
            title (str, optional): Optionally add title to the figure. Defaults to None, which uses the file name as a title.

        Outputs:
            `self.filename`_spectrogram.png

        Returns:
            MgFigure: An MgFigure object referring to the internal figure and its data.
        """

        if not has_audio(self.filename):
            print('The video has no audio track.')
            return

        hop_size = int(window_size / overlap)

        y, sr = librosa.load(self.filename, sr=None)

        S = librosa.feature.melspectrogram(y=y,
                                           sr=sr,
                                           n_mels=mel_filters,
                                           fmax=sr / 2,
                                           n_fft=window_size,
                                           hop_length=hop_size,
                                           power=power)

        fig, ax = plt.subplots(figsize=(12, 6), dpi=300)

        # make sure background is white
        fig.patch.set_facecolor('white')
        fig.patch.set_alpha(1)

        # add title
        if title == None:
            title = os.path.basename(self.filename)
        fig.suptitle(title, fontsize=16)

        img = librosa.display.specshow(librosa.power_to_db(S,
                                                           ref=np.max,
                                                           top_db=120),
                                       sr=sr,
                                       y_axis='mel',
                                       fmax=sr / 2,
                                       x_axis='time',
                                       hop_length=hop_size,
                                       ax=ax)

        print(type(img))

        colorbar_ticks = range(-120, 1, 10)
        fig.colorbar(img, format='%+2.0f dB', ticks=colorbar_ticks)

        # get rid of "default" ticks
        ax.yaxis.set_minor_locator(matplotlib.ticker.NullLocator())

        # ax.set(title=os.path.basename(self.filename))
        length = get_length(self.filename)
        plot_xticks = np.arange(0, length + 0.1, length / 20)
        ax.set(xticks=plot_xticks)

        freq_ticks = [elem * 100 for elem in range(10)]
        freq_ticks = []
        freq = 100
        while freq < sr / 2:
            freq_ticks.append(freq)
            freq *= 1.3

        freq_ticks = [round(elem, -2) for elem in freq_ticks]
        freq_ticks.append(sr / 2)
        freq_ticks_labels = [
            str(round(elem / 1000, 1)) +
            'k' if elem > 1000 else int(round(elem)) for elem in freq_ticks
        ]

        ax.set(yticks=(freq_ticks))
        ax.set(yticklabels=(freq_ticks_labels))

        plt.tight_layout()

        plt.savefig('%s_spectrogram.png' % self.of,
                    format='png',
                    transparent=False)

        if not autoshow:
            plt.close()

        # create MgFigure
        data = {
            "hop_size": hop_size,
            "sr": sr,
            "of": self.of,
            "S": S,
            "length": length
        }

        mgf = MgFigure(figure=fig,
                       figure_type='audio.spectrogram',
                       data=data,
                       layers=None,
                       image=self.of + '_spectrogram.png')

        return mgf