Ejemplo n.º 1
0
def try_sync(args: argparse.Namespace, reference_pipe: Optional[Pipeline], result: Dict[str, Any]) -> bool:
    sync_was_successful = True
    exc = None
    try:
        logger.info('extracting speech segments from %s...',
                    'stdin' if not args.srtin else 'subtitles file(s) {}'.format(args.srtin))
        if not args.srtin:
            args.srtin = [None]
        for srtin in args.srtin:
            skip_sync = args.skip_sync or reference_pipe is None
            skip_infer_framerate_ratio = args.skip_infer_framerate_ratio or reference_pipe is None
            srtout = srtin if args.overwrite_input else args.srtout
            srt_pipe_maker = get_srt_pipe_maker(args, srtin)
            framerate_ratios = get_framerate_ratios_to_try(args)
            srt_pipes = [srt_pipe_maker(1.)] + [srt_pipe_maker(rat) for rat in framerate_ratios]
            for srt_pipe in srt_pipes:
                if callable(srt_pipe):
                    continue
                else:
                    srt_pipe.fit(srtin)
            if not skip_infer_framerate_ratio and hasattr(reference_pipe[-1], 'num_frames'):
                inferred_framerate_ratio_from_length = float(reference_pipe[-1].num_frames) / cast(Pipeline, srt_pipes[0])[-1].num_frames
                logger.info('inferred frameratio ratio: %.3f' % inferred_framerate_ratio_from_length)
                srt_pipes.append(cast(Pipeline, srt_pipe_maker(inferred_framerate_ratio_from_length)).fit(srtin))
                logger.info('...done')
            logger.info('computing alignments...')
            if skip_sync:
                best_score = 0.
                best_srt_pipe = cast(Pipeline, srt_pipes[0])
                offset_samples = 0
            else:
                (best_score, offset_samples), best_srt_pipe = MaxScoreAligner(
                    FFTAligner, srtin, SAMPLE_RATE, args.max_offset_seconds
                ).fit_transform(
                    reference_pipe.transform(args.reference),
                    srt_pipes,
                )
            logger.info('...done')
            offset_seconds = offset_samples / float(SAMPLE_RATE) + args.apply_offset_seconds
            scale_step = best_srt_pipe.named_steps['scale']
            logger.info('score: %.3f', best_score)
            logger.info('offset seconds: %.3f', offset_seconds)
            logger.info('framerate scale factor: %.3f', scale_step.scale_factor)
            output_steps: List[Tuple[str, TransformerMixin]] = [('shift', SubtitleShifter(offset_seconds))]
            if args.merge_with_reference:
                output_steps.append(
                    ('merge', SubtitleMerger(reference_pipe.named_steps['parse'].subs_))
                )
            output_pipe = Pipeline(output_steps)
            out_subs = output_pipe.fit_transform(scale_step.subs_)
            if args.output_encoding != 'same':
                out_subs = out_subs.set_encoding(args.output_encoding)
            suppress_output_thresh = args.suppress_output_if_offset_less_than
            if (
                suppress_output_thresh is None
                or (
                    scale_step.scale_factor == 1.0
                    and offset_seconds >= suppress_output_thresh
                )
            ):
                logger.info('writing output to {}'.format(srtout or 'stdout'))
                out_subs.write_file(srtout)
            else:
                logger.warning('suppressing output because offset %s was less than suppression threshold %s',
                               offset_seconds, args.suppress_output_if_offset_less_than)
    except FailedToFindAlignmentException as e:
        sync_was_successful = False
        logger.error(e)
    except Exception as e:
        exc = e
        sync_was_successful = False
        logger.error(e)
    else:
        result['offset_seconds'] = offset_seconds
        result['framerate_scale_factor'] = scale_step.scale_factor
    finally:
        if exc is not None:
            raise exc
        result['sync_was_successful'] = sync_was_successful
        return sync_was_successful
Ejemplo n.º 2
0
def test_fft_alignment(s1, s2, true_offset):
    assert FFTAligner().fit_transform(s2, s1) == true_offset
    assert MaxScoreAligner(FFTAligner).fit_transform(s2, s1)[0] == true_offset
    assert MaxScoreAligner(FFTAligner()).fit_transform(s2, s1)[0] == true_offset
Ejemplo n.º 3
0
def try_sync(args: argparse.Namespace, reference_pipe: Optional[Pipeline],
             result: Dict[str, Any]) -> bool:
    sync_was_successful = True
    exc = None
    try:
        logger.info(
            "extracting speech segments from %s...",
            "stdin"
            if not args.srtin else "subtitles file(s) {}".format(args.srtin),
        )
        if not args.srtin:
            args.srtin = [None]
        for srtin in args.srtin:
            skip_sync = args.skip_sync or reference_pipe is None
            skip_infer_framerate_ratio = (args.skip_infer_framerate_ratio
                                          or reference_pipe is None)
            srtout = srtin if args.overwrite_input else args.srtout
            srt_pipe_maker = get_srt_pipe_maker(args, srtin)
            framerate_ratios = get_framerate_ratios_to_try(args)
            srt_pipes = [srt_pipe_maker(1.0)
                         ] + [srt_pipe_maker(rat) for rat in framerate_ratios]
            for srt_pipe in srt_pipes:
                if callable(srt_pipe):
                    continue
                else:
                    srt_pipe.fit(srtin)
            if not skip_infer_framerate_ratio and hasattr(
                    reference_pipe[-1], "num_frames"):
                inferred_framerate_ratio_from_length = (
                    float(reference_pipe[-1].num_frames) /
                    cast(Pipeline, srt_pipes[0])[-1].num_frames)
                logger.info("inferred frameratio ratio: %.3f" %
                            inferred_framerate_ratio_from_length)
                srt_pipes.append(
                    cast(Pipeline,
                         srt_pipe_maker(
                             inferred_framerate_ratio_from_length)).fit(srtin))
                logger.info("...done")
            logger.info("computing alignments...")
            if skip_sync:
                best_score = 0.0
                best_srt_pipe = cast(Pipeline, srt_pipes[0])
                offset_samples = 0
            else:
                (best_score, offset_samples), best_srt_pipe = MaxScoreAligner(
                    FFTAligner, srtin, SAMPLE_RATE,
                    args.max_offset_seconds).fit_transform(
                        reference_pipe.transform(args.reference),
                        srt_pipes,
                    )
            logger.info("...done")
            offset_seconds = (offset_samples / float(SAMPLE_RATE) +
                              args.apply_offset_seconds)
            scale_step = best_srt_pipe.named_steps["scale"]
            logger.info("score: %.3f", best_score)
            logger.info("offset seconds: %.3f", offset_seconds)
            logger.info("framerate scale factor: %.3f",
                        scale_step.scale_factor)
            output_steps: List[Tuple[str, TransformerMixin]] = [
                ("shift", SubtitleShifter(offset_seconds))
            ]
            if args.merge_with_reference:
                output_steps.append(
                    ("merge",
                     SubtitleMerger(
                         reference_pipe.named_steps["parse"].subs_)))
            output_pipe = Pipeline(output_steps)
            out_subs = output_pipe.fit_transform(scale_step.subs_)
            if args.output_encoding != "same":
                out_subs = out_subs.set_encoding(args.output_encoding)
            suppress_output_thresh = args.suppress_output_if_offset_less_than
            if suppress_output_thresh is None or (
                    scale_step.scale_factor == 1.0
                    and offset_seconds >= suppress_output_thresh):
                logger.info("writing output to {}".format(srtout or "stdout"))
                out_subs.write_file(srtout)
            else:
                logger.warning(
                    "suppressing output because offset %s was less than suppression threshold %s",
                    offset_seconds,
                    args.suppress_output_if_offset_less_than,
                )
    except FailedToFindAlignmentException:
        sync_was_successful = False
        logger.exception("failed to find alignment")
    except Exception as e:
        exc = e
        sync_was_successful = False
    else:
        result["offset_seconds"] = offset_seconds
        result["framerate_scale_factor"] = scale_step.scale_factor
    finally:
        if exc is not None:
            raise exc
        result["sync_was_successful"] = sync_was_successful
        return sync_was_successful