Esempio n. 1
0
def test_quality_of_fit_mismatch(result_meta, bias=0):

    all_files = np.unique(result_meta.file_number)
    pairs = [(n1, n2) for n1 in all_files for n2 in all_files if n2 != n1]

    print('---- quality of fit (computing for %d mismatches) ----' %
          len(pairs))

    qualities = []

    for fn1, fn2 in pairs:
        labels0 = result_meta.label[result_meta.file_number == fn1]
        probs0 = result_meta.predicted_score[result_meta.file_number == fn2]
        l = min(len(labels0), len(probs0))

        labels = probs0 * 0
        labels[:l] = labels0[:l]

        for flip in [False, True]:
            if flip: probs = probs0[::-1]
            else: probs = probs0[::]

            skew, shift, quality = find_transform.find_transform_parameters(
                labels, probs, bias=bias)
            quality_error = quality >= quality_of_fit.threshold

            qualities.append(quality)
            print(quality)

    return np.array(qualities)
Esempio n. 2
0
def test_correct_sync(result_meta, bias=0):
    print('---- synchronization accuracy ----')

    results = []
    for unique_label in np.unique(result_meta.label):
        part = result_meta[result_meta.label == unique_label]
        skew, shift, quality = find_transform.find_transform_parameters(
            part.label, part.predicted_score, bias=bias)
        skew_error = skew != 1.0
        results.append([skew_error, shift, quality])

    sync_results = pd.DataFrame(
        np.array(results), columns=['skew_error', 'shift_error', 'quality'])
    print(sync_results)

    print('skew errors:', sync_results.skew_error.sum())
    print('shift RMSE:', np.sqrt(np.mean(sync_results.shift_error**2)))

    return sync_results
Esempio n. 3
0
def synchronize(video_file, subtitle_file, output_file, verbose=False, \
    parallelism=3, fixed_skew=None, model_file=None, return_parameters=False, \
    **kwargs):
    """
    Automatically synchronize subtitles with audio in a video file.
    Uses FFMPEG to extract the audio from the video file and the command line
    tool "ffmpeg" must be available. Uses temporary files which are deleted
    automatically.

    Args:
        video_file (string): Input video file name
        subtitle_file (string): Input SRT subtitle file name
        output_file (string): Output (synchronized) SRT subtitle file name
        verbose (boolean): If True, print progress information to stdout
        return_parameters (boolean): If True, returns the syncrhonization
            parameters instead of just the success flag
        other arguments: Search parameters, see ``autosubsync --help``

    Returns:
        If return_parameters is False (default), returns
        True on success (quality of fit test passed), False if failed.

        If return_parameters is True, returns a tuple of four values

            success (boolean)   success flag as above
            quality (float)     metric used to determine the value of "success"
            skew (float)        best fit skew/speed (unitless)
            shift (float)       best fit shift in seconds

    """

    # these are here to enable running as python3 autosubsync/main.py
    from autosubsync import features
    from autosubsync import find_transform
    from autosubsync import model
    from autosubsync import preprocessing
    from autosubsync import quality_of_fit

    # argument parsing
    if model_file is None:
        from pkg_resources import resource_filename
        model_file = resource_filename(__name__, '../trained-model.bin')

    fixed_skew = parse_skew(fixed_skew)

    # load model
    trained_model = model.load(model_file)

    if verbose: print('Extracting audio using ffmpeg and reading subtitles...')
    sound_data, subvec = preprocessing.import_target_files(
        video_file, subtitle_file)

    if verbose:        print(('computing features for %d audio samples ' + \
'using %d parallel process(es)') % (len(subvec), parallelism))

    features_x, shifted_y = features.compute(sound_data,
                                             subvec,
                                             parallelism=parallelism)

    if verbose:        print('extracted features of size %s, performing speech detection' % \
str(features_x.shape))

    y_scores = model.predict(trained_model, features_x)

    # save some memory before parallelization fork so we look less bad
    del features_x, sound_data, subvec
    gc.collect()

    if verbose:
        print('computing best fit with %d frames' % len(y_scores))

    skew, shift, quality = find_transform.find_transform_parameters(\
        shifted_y, y_scores, \
        parallelism=parallelism, fixed_skew=fixed_skew, bias=trained_model[1], \
        verbose=verbose, **kwargs)

    success = quality > quality_of_fit.threshold
    if verbose:
        print('quality of fit: %g, threshold %g' %
              (quality, quality_of_fit.threshold))
        print('Fit complete. Performing resync, writing to ' + output_file)

    transform_func = find_transform.parameters_to_transform(skew, shift)
    preprocessing.transform_srt(subtitle_file, output_file, transform_func)

    if verbose and success: print('success!')

    if return_parameters:
        return success, quality, skew, shift
    else:
        return success