Ejemplo n.º 1
0
def download(obj, provider, language, age, directory, encoding, single, force, hearing_impaired, min_score, max_workers,
             verbose, path):
    """Download best subtitles.

    PATH can be an directory containing videos, a video file path or a video file name. It can be used multiple times.

    If an existing subtitle is detected (external or embedded) in the correct language, the download is skipped for
    the associated video.

    """
    # process parameters
    language = set(language)

    # scan videos
    videos = []
    ignored_videos = []
    errored_paths = []
    with click.progressbar(path, label='Collecting videos', item_show_func=lambda p: p or '') as bar:
        for p in bar:
            logger.debug('Collecting path %s', p)

            # non-existing
            if not os.path.exists(p):
                try:
                    video = Video.fromname(p)
                except:
                    logger.exception('Unexpected error while collecting non-existing path %s', p)
                    errored_paths.append(p)
                    continue
                videos.append(video)
                continue

            # directories
            if os.path.isdir(p):
                try:
                    scanned_videos = scan_videos(p, subtitles=not force, embedded_subtitles=not force,
                                                 subtitles_dir=directory, age=age)
                except:
                    logger.exception('Unexpected error while collecting directory path %s', p)
                    errored_paths.append(p)
                    continue
                for video in scanned_videos:
                    if check_video(video, languages=language, age=age, undefined=single):
                        videos.append(video)
                    else:
                        ignored_videos.append(video)
                continue

            # other inputs
            try:
                video = scan_video(p, subtitles=not force, embedded_subtitles=not force, subtitles_dir=directory)
            except:
                logger.exception('Unexpected error while collecting path %s', p)
                errored_paths.append(p)
                continue
            if check_video(video, languages=language, age=age, undefined=single):
                videos.append(video)
            else:
                ignored_videos.append(video)

    # output errored paths
    if verbose > 0:
        for p in errored_paths:
            click.secho('%s errored' % p, fg='red')

    # output ignored videos
    if verbose > 1:
        for video in ignored_videos:
            click.secho('%s ignored - subtitles: %s / age: %d day%s' % (
                os.path.split(video.name)[1],
                ', '.join(str(s) for s in video.subtitle_languages) or 'none',
                video.age.days,
                's' if video.age.days > 1 else ''
            ), fg='yellow')

    # report collected videos
    click.echo('%s video%s collected / %s video%s ignored / %s error%s' % (
        click.style(str(len(videos)), bold=True, fg='green' if videos else None),
        's' if len(videos) > 1 else '',
        click.style(str(len(ignored_videos)), bold=True, fg='yellow' if ignored_videos else None),
        's' if len(ignored_videos) > 1 else '',
        click.style(str(len(errored_paths)), bold=True, fg='red' if errored_paths else None),
        's' if len(errored_paths) > 1 else '',
    ))

    # exit if no video collected
    if not videos:
        return

    # download best subtitles
    downloaded_subtitles = defaultdict(list)
    with AsyncProviderPool(max_workers=max_workers, providers=provider, provider_configs=obj['provider_configs']) as p:
        with click.progressbar(videos, label='Downloading subtitles',
                               item_show_func=lambda v: os.path.split(v.name)[1] if v is not None else '') as bar:
            for v in bar:
                scores = get_scores(v)
                subtitles = p.download_best_subtitles(p.list_subtitles(v, language - v.subtitle_languages),
                                                      v, language, min_score=scores['hash'] * min_score / 100,
                                                      hearing_impaired=hearing_impaired, only_one=single)
                downloaded_subtitles[v] = subtitles

    # save subtitles
    total_subtitles = 0
    for v, subtitles in downloaded_subtitles.items():
        saved_subtitles = save_subtitles(v, subtitles, single=single, directory=directory, encoding=encoding)
        total_subtitles += len(saved_subtitles)

        if verbose > 0:
            click.echo('%s subtitle%s downloaded for %s' % (click.style(str(len(saved_subtitles)), bold=True),
                                                            's' if len(saved_subtitles) > 1 else '',
                                                            os.path.split(v.name)[1]))

        if verbose > 1:
            for s in saved_subtitles:
                matches = s.get_matches(v)
                score = compute_score(s, v)

                # score color
                score_color = None
                scores = get_scores(v)
                if isinstance(v, Movie):
                    if score < scores['title']:
                        score_color = 'red'
                    elif score < scores['title'] + scores['year'] + scores['release_group']:
                        score_color = 'yellow'
                    else:
                        score_color = 'green'
                elif isinstance(v, Episode):
                    if score < scores['series'] + scores['season'] + scores['episode']:
                        score_color = 'red'
                    elif score < scores['series'] + scores['season'] + scores['episode'] + scores['release_group']:
                        score_color = 'yellow'
                    else:
                        score_color = 'green'

                # scale score from 0 to 100 taking out preferences
                scaled_score = score
                if s.hearing_impaired == hearing_impaired:
                    scaled_score -= scores['hearing_impaired']
                scaled_score *= 100 / scores['hash']

                # echo some nice colored output
                click.echo('  - [{score}] {language} subtitle from {provider_name} (match on {matches})'.format(
                    score=click.style('{:5.1f}'.format(scaled_score), fg=score_color, bold=score >= scores['hash']),
                    language=s.language.name if s.language.country is None else '%s (%s)' % (s.language.name,
                                                                                             s.language.country.name),
                    provider_name=s.provider_name,
                    matches=', '.join(sorted(matches, key=scores.get, reverse=True))
                ))

    if verbose == 0:
        click.echo('Downloaded %s subtitle%s' % (click.style(str(total_subtitles), bold=True),
                                                 's' if total_subtitles > 1 else ''))
Ejemplo n.º 2
0
def compute_score(matches,
                  subtitle,
                  video,
                  hearing_impaired=None,
                  score_obj=None):
    """Compute the score of the `subtitle` against the `video` with `hearing_impaired` preference.

    patch:
        - remove upper bounds of score
        - re-add matches argument and remove get_matches from here

    :func:`compute_score` uses the :meth:`Subtitle.get_matches <subliminal.subtitle.Subtitle.get_matches>` method and
    applies the scores (either from :data:`episode_scores` or :data:`movie_scores`) after some processing.

    :param subtitle: the subtitle to compute the score of.
    :type subtitle: :class:`~subliminal.subtitle.Subtitle`
    :param video: the video to compute the score against.
    :type video: :class:`~subliminal.video.Video`
    :param bool hearing_impaired: hearing impaired preference.
    :return: score of the subtitle.
    :rtype: int

    """
    logger.info('%r: Computing score for video %r with %r', subtitle, video,
                dict(hearing_impaired=hearing_impaired))

    if score_obj is not None:
        scores = score_obj.scores
        score_obj.check_custom_profiles(subtitle, matches)
    else:
        scores = get_scores(video)

    is_episode = isinstance(video, Episode)
    is_movie = isinstance(video, Movie)

    episode_hash_valid_if = {"series", "season", "episode", "source"}
    movie_hash_valid_if = {"video_codec", "source"}

    orig_matches = matches.copy()

    # on hash match, discard everything else
    if subtitle.hash_verifiable and 'hash' in matches:
        # hash is error-prone, try to fix that
        hash_valid_if = episode_hash_valid_if if is_episode else movie_hash_valid_if

        # don't validate hashes of specials, as season and episode tend to be wrong
        if is_movie or not video.is_special:
            if hash_valid_if <= set(matches):
                # series, season and episode matched, hash is valid
                logger.debug(
                    '%r: Using valid hash, as %s are correct (%r) and (%r)',
                    subtitle, hash_valid_if, matches, video)
                matches &= {'hash'}
            else:
                # no match, invalidate hash
                logger.debug(
                    '%r: Ignoring hash as other matches are wrong (missing: %r) and (%r)',
                    subtitle, hash_valid_if - matches, video)
                matches -= {"hash"}
    elif 'hash' in matches:
        logger.debug('%r: Hash not verifiable for this provider. Keeping it',
                     subtitle)
        matches &= {'hash'}

    # handle equivalent matches
    eq_matches = set()
    if is_episode:
        _episode_checks(video, eq_matches, matches)
    elif is_movie and 'imdb_id' in matches:
        logger.debug('Adding imdb_id match equivalents')
        eq_matches |= {'title', 'year'}

    matches |= eq_matches

    # handle hearing impaired
    if hearing_impaired is not None and subtitle.hearing_impaired == hearing_impaired:
        logger.debug('Matched hearing_impaired')
        matches.add('hearing_impaired')
        orig_matches.add('hearing_impaired')

    # compute the score
    score = sum((scores.get(match, 0) for match in matches))
    logger.info('%r: Computed score %r with final matches %r', subtitle, score,
                matches)

    score_without_hash = sum((scores.get(match, 0)
                              for match in orig_matches | eq_matches
                              if match != "hash"))

    return score, score_without_hash
Ejemplo n.º 3
0
def download(obj, provider, language, age, directory, encoding, single, force,
             hearing_impaired, min_score, max_workers, verbose, path):
    """Download best subtitles.

    PATH can be an directory containing videos, a video file path or a video file name. It can be used multiple times.

    If an existing subtitle is detected (external or embedded) in the correct language, the download is skipped for
    the associated video.

    """
    # process parameters
    language = set(language)

    # scan videos
    videos = []
    ignored_videos = []
    errored_paths = []
    with click.progressbar(path,
                           label='Collecting videos',
                           item_show_func=lambda p: p or '') as bar:
        for p in bar:
            logger.debug('Collecting path %s', p)

            # non-existing
            if not os.path.exists(p):
                try:
                    video = Video.fromname(p)
                except:
                    logger.exception(
                        'Unexpected error while collecting non-existing path %s',
                        p)
                    errored_paths.append(p)
                    continue
                videos.append(video)
                continue

            # directories
            if os.path.isdir(p):
                try:
                    scanned_videos = scan_videos(p,
                                                 subtitles=not force,
                                                 embedded_subtitles=not force,
                                                 subtitles_dir=directory,
                                                 age=age)
                except:
                    logger.exception(
                        'Unexpected error while collecting directory path %s',
                        p)
                    errored_paths.append(p)
                    continue
                for video in scanned_videos:
                    if check_video(video,
                                   languages=language,
                                   age=age,
                                   undefined=single):
                        videos.append(video)
                    else:
                        ignored_videos.append(video)
                continue

            # other inputs
            try:
                video = scan_video(p,
                                   subtitles=not force,
                                   embedded_subtitles=not force,
                                   subtitles_dir=directory)
            except:
                logger.exception('Unexpected error while collecting path %s',
                                 p)
                errored_paths.append(p)
                continue
            if check_video(video,
                           languages=language,
                           age=age,
                           undefined=single):
                videos.append(video)
            else:
                ignored_videos.append(video)

    # output errored paths
    if verbose > 0:
        for p in errored_paths:
            click.secho('%s errored' % p, fg='red')

    # output ignored videos
    if verbose > 1:
        for video in ignored_videos:
            click.secho(
                '%s ignored - subtitles: %s / age: %d day%s' %
                (os.path.split(video.name)[1],
                 ', '.join(str(s) for s in video.subtitle_languages)
                 or 'none', video.age.days, 's' if video.age.days > 1 else ''),
                fg='yellow')

    # report collected videos
    click.echo('%s video%s collected / %s video%s ignored / %s error%s' % (
        click.style(
            str(len(videos)), bold=True, fg='green' if videos else None),
        's' if len(videos) > 1 else '',
        click.style(str(len(ignored_videos)),
                    bold=True,
                    fg='yellow' if ignored_videos else None),
        's' if len(ignored_videos) > 1 else '',
        click.style(str(len(errored_paths)),
                    bold=True,
                    fg='red' if errored_paths else None),
        's' if len(errored_paths) > 1 else '',
    ))

    # exit if no video collected
    if not videos:
        return

    # download best subtitles
    downloaded_subtitles = defaultdict(list)
    with AsyncProviderPool(max_workers=max_workers,
                           providers=provider,
                           provider_configs=obj['provider_configs']) as p:
        with click.progressbar(
                videos,
                label='Downloading subtitles',
                item_show_func=lambda v: os.path.split(v.name)[1]
                if v is not None else '') as bar:
            for v in bar:
                scores = get_scores(v)
                subtitles = p.download_best_subtitles(
                    p.list_subtitles(v, language - v.subtitle_languages),
                    v,
                    language,
                    min_score=scores['hash'] * min_score / 100,
                    hearing_impaired=hearing_impaired,
                    only_one=single)
                downloaded_subtitles[v] = subtitles

    # save subtitles
    total_subtitles = 0
    for v, subtitles in downloaded_subtitles.items():
        saved_subtitles = save_subtitles(v,
                                         subtitles,
                                         single=single,
                                         directory=directory,
                                         encoding=encoding)
        total_subtitles += len(saved_subtitles)

        if verbose > 0:
            click.echo(
                '%s subtitle%s downloaded for %s' %
                (click.style(str(len(saved_subtitles)), bold=True), 's' if
                 len(saved_subtitles) > 1 else '', os.path.split(v.name)[1]))

        if verbose > 1:
            for s in saved_subtitles:
                matches = s.get_matches(v)
                score = compute_score(s, v)

                # score color
                score_color = None
                scores = get_scores(v)
                if isinstance(v, Movie):
                    if score < scores['title']:
                        score_color = 'red'
                    elif score < scores['title'] + scores['year'] + scores[
                            'release_group']:
                        score_color = 'yellow'
                    else:
                        score_color = 'green'
                elif isinstance(v, Episode):
                    if score < scores['series'] + scores['season'] + scores[
                            'episode']:
                        score_color = 'red'
                    elif score < scores['series'] + scores['season'] + scores[
                            'episode'] + scores['release_group']:
                        score_color = 'yellow'
                    else:
                        score_color = 'green'

                # scale score from 0 to 100 taking out preferences
                scaled_score = score
                if s.hearing_impaired == hearing_impaired:
                    scaled_score -= scores['hearing_impaired']
                scaled_score *= 100 / scores['hash']

                # echo some nice colored output
                click.echo(
                    '  - [{score}] {language} subtitle from {provider_name} (match on {matches})'
                    .format(score=click.style('{:5.1f}'.format(scaled_score),
                                              fg=score_color,
                                              bold=score >= scores['hash']),
                            language=s.language.name
                            if s.language.country is None else '%s (%s)' %
                            (s.language.name, s.language.country.name),
                            provider_name=s.provider_name,
                            matches=', '.join(
                                sorted(matches, key=scores.get,
                                       reverse=True))))

    if verbose == 0:
        click.echo('Downloaded %s subtitle%s' %
                   (click.style(str(total_subtitles), bold=True),
                    's' if total_subtitles > 1 else ''))