Exemple #1
0
    def matched(self):
        if not self._matched_result:
            # we need to make a copy here, as the merge functions work in place and
            # calling them on the match tree would modify it
            parts = [copy.copy(node.guess) for node in self.nodes() if node.guess]

            # 1- try to merge similar information together and give it a higher
            #    confidence
            for int_part in ('year', 'season', 'episodeNumber'):
                merge_similar_guesses(parts, int_part, choose_int)

            for string_part in ('title', 'series', 'container', 'format',
                                'releaseGroup', 'website', 'audioCodec',
                                'videoCodec', 'screenSize', 'episodeFormat',
                                'audioChannels', 'idNumber'):
                merge_similar_guesses(parts, string_part, choose_string)

            # 2- merge the rest, potentially discarding information not properly
            #    merged before
            result = merge_all(parts,
                               append=['language', 'subtitleLanguage', 'other'])

            log.debug('Final result: ' + result.nice_string())
            self._matched_result = result
        return self._matched_result
Exemple #2
0
    def matched(self):
        # we need to make a copy here, as the merge functions work in place and
        # calling them on the match tree would modify it

        parts = [node.guess for node in self.match_tree.nodes() if node.guess]
        parts = copy.deepcopy(parts)

        # 1- try to merge similar information together and give it a higher
        #    confidence
        for int_part in ('year', 'season', 'episodeNumber'):
            merge_similar_guesses(parts, int_part, choose_int)

        for string_part in ('title', 'series', 'container', 'format',
                            'releaseGroup', 'website', 'audioCodec',
                            'videoCodec', 'screenSize', 'episodeFormat',
                            'audioChannels'):
            merge_similar_guesses(parts, string_part, choose_string)

        # 2- merge the rest, potentially discarding information not properly
        #    merged before
        result = merge_all(parts,
                           append=['language', 'subtitleLanguage', 'other'])

        log.debug('Final result: ' + result.nice_string())
        return result
Exemple #3
0
    def matched(self):
        # we need to make a copy here, as the merge functions work in place and
        # calling them on the match tree would modify it
        parts = [node.guess for node in self.nodes() if node.guess]
        parts = copy.deepcopy(parts)

        # 1- try to merge similar information together and give it a higher
        #    confidence
        for int_part in ("year", "season", "episodeNumber"):
            merge_similar_guesses(parts, int_part, choose_int)

        for string_part in (
            "title",
            "series",
            "container",
            "format",
            "releaseGroup",
            "website",
            "audioCodec",
            "videoCodec",
            "screenSize",
            "episodeFormat",
            "audioChannels",
            "idNumber",
        ):
            merge_similar_guesses(parts, string_part, choose_string)

        # 2- merge the rest, potentially discarding information not properly
        #    merged before
        result = merge_all(parts, append=["language", "subtitleLanguage", "other"])

        log.debug("Final result: " + result.nice_string())
        return result
    def matched(self):
        # we need to make a copy here, as the merge functions work in place and
        # calling them on the match tree would modify it
        parts = copy.deepcopy(self.parts)

        # 1- start by doing some common preprocessing tasks

        # 1.1- ", the" at the end of a series title should be prepended to it
        for part in parts:
            if "series" not in part:
                continue

            series = part["series"]
            lseries = series.lower()

            if lseries[-4:] == ",the":
                part["series"] = "The " + series[:-4]

            if lseries[-5:] == ", the":
                part["series"] = "The " + series[:-5]

        # 2- try to merge similar information together and give it a higher confidence
        for int_part in ("year", "season", "episodeNumber"):
            merge_similar_guesses(parts, int_part, choose_int)

        for string_part in (
            "title",
            "series",
            "container",
            "format",
            "releaseGroup",
            "website",
            "audioCodec",
            "videoCodec",
            "screenSize",
            "episodeFormat",
        ):
            merge_similar_guesses(parts, string_part, choose_string)

        result = merge_all(parts, append=["language", "subtitleLanguage", "other"])

        # 3- some last minute post-processing
        if result["type"] == "episode" and "season" not in result and result.get("episodeFormat", "") == "Minisode":
            result["season"] = 0

        log.debug("Final result: " + result.nice_string())
        return result
    def matched(self):
        # we need to make a copy here, as the merge functions work in place and
        # calling them on the match tree would modify it
        parts = copy.deepcopy(self.parts)

        # 1- start by doing some common preprocessing tasks

        # 1.1- ", the" at the end of a series title should be prepended to it
        for part in parts:
            if 'series' not in part:
                continue

            series = part['series']
            lseries = series.lower()

            if lseries[-4:] == ',the':
                part['series'] = 'The ' + series[:-4]

            if lseries[-5:] == ', the':
                part['series'] = 'The ' + series[:-5]


        # 2- try to merge similar information together and give it a higher confidence
        for int_part in ('year', 'season', 'episodeNumber'):
            merge_similar_guesses(parts, int_part, choose_int)

        for string_part in ('title', 'series', 'container', 'format', 'releaseGroup', 'website',
                            'audioCodec', 'videoCodec', 'screenSize', 'episodeFormat'):
            merge_similar_guesses(parts, string_part, choose_string)

        result = merge_all(parts, append = ['language', 'subtitleLanguage', 'other'])

        # 3- some last minute post-processing
        if (result['type'] == 'episode' and
            'season' not in result and
            result.get('episodeFormat', '') == 'Minisode'):
            result['season'] = 0

        log.debug('Final result: ' + result.nice_string())
        return result
    def matched(self):
        # we need to make a copy here, as the merge functions work in place and
        # calling them on the match tree would modify it

        parts = [node.guess for node in self.match_tree.nodes() if node.guess]
        parts = copy.deepcopy(parts)

        # 1- try to merge similar information together and give it a higher
        #    confidence
        for int_part in ('year', 'season', 'episodeNumber'):
            merge_similar_guesses(parts, int_part, choose_int)

        for string_part in ('title', 'series', 'container', 'format',
                            'releaseGroup', 'website', 'audioCodec',
                            'videoCodec', 'screenSize', 'episodeFormat'):
            merge_similar_guesses(parts, string_part, choose_string)

        result = merge_all(parts,
                           append=['language', 'subtitleLanguage', 'other'])

        log.debug('Final result: ' + result.nice_string())
        return result
Exemple #7
0
def guess_file_info(filename, filetype, info=None):
    """info can contain the names of the various plugins, such as 'filename' to
    detect filename info, or 'hash_md5' to get the md5 hash of the file.

    >>> guess_file_info('tests/dummy.srt', 'autodetect', info = ['hash_md5', 'hash_sha1'])
    {'hash_md5': 'e781de9b94ba2753a8e2945b2c0a123d', 'hash_sha1': 'bfd18e2f4e5d59775c2bc14d80f56971891ed620'}
    """
    result = []
    hashers = []

    # Force unicode as soon as possible
    filename = u(filename)

    if info is None:
        info = ['filename']

    if isinstance(info, base_text_type):
        info = [info]

    for infotype in info:
        if infotype == 'filename':
            result.append(_guess_filename(filename, filetype))

        elif infotype == 'hash_mpc':
            from guessit.hash_mpc import hash_file
            try:
                result.append(Guess({'hash_mpc': hash_file(filename)},
                                    confidence=1.0))
            except Exception as e:
                log.warning('Could not compute MPC-style hash because: %s' % e)

        elif infotype == 'hash_ed2k':
            from guessit.hash_ed2k import hash_file
            try:
                result.append(Guess({'hash_ed2k': hash_file(filename)},
                                    confidence=1.0))
            except Exception as e:
                log.warning('Could not compute ed2k hash because: %s' % e)

        elif infotype.startswith('hash_'):
            import hashlib
            hashname = infotype[5:]
            try:
                hasher = getattr(hashlib, hashname)()
                hashers.append((infotype, hasher))
            except AttributeError:
                log.warning('Could not compute %s hash because it is not available from python\'s hashlib module' % hashname)

        else:
            log.warning('Invalid infotype: %s' % infotype)

    # do all the hashes now, but on a single pass
    if hashers:
        try:
            blocksize = 8192
            hasherobjs = dict(hashers).values()

            with open(filename, 'rb') as f:
                chunk = f.read(blocksize)
                while chunk:
                    for hasher in hasherobjs:
                        hasher.update(chunk)
                    chunk = f.read(blocksize)

            for infotype, hasher in hashers:
                result.append(Guess({infotype: hasher.hexdigest()},
                                    confidence=1.0))
        except Exception as e:
            log.warning('Could not compute hash because: %s' % e)

    result = merge_all(result)

    # last minute adjustments

    # if country is in the guessed properties, make it part of the filename
    if 'series' in result and 'country' in result:
        result['series'] += ' (%s)' % result['country'].alpha2.upper()


    return result
Exemple #8
0
def guess_file_info(filename, filetype, info=None):
    """info can contain the names of the various plugins, such as 'filename' to
    detect filename info, or 'hash_md5' to get the md5 hash of the file.

    >>> guess_file_info('tests/dummy.srt', 'autodetect', info = ['hash_md5', 'hash_sha1'])
    {'hash_md5': 'e781de9b94ba2753a8e2945b2c0a123d', 'hash_sha1': 'bfd18e2f4e5d59775c2bc14d80f56971891ed620'}
    """
    result = []
    hashers = []

    # Force unicode as soon as possible
    filename = u(filename)

    if info is None:
        info = ['filename']

    if isinstance(info, base_text_type):
        info = [info]

    for infotype in info:
        if infotype == 'filename':
            result.append(_guess_filename(filename, filetype))

        elif infotype == 'hash_mpc':
            from guessit.hash_mpc import hash_file

            try:
                result.append(Guess({'hash_mpc': hash_file(filename)},
                                    confidence=1.0))
            except Exception as e:
                log.warning('Could not compute MPC-style hash because: %s' % e)

        elif infotype == 'hash_ed2k':
            from guessit.hash_ed2k import hash_file

            try:
                result.append(Guess({'hash_ed2k': hash_file(filename)},
                                    confidence=1.0))
            except Exception as e:
                log.warning('Could not compute ed2k hash because: %s' % e)

        elif infotype.startswith('hash_'):
            import hashlib

            hashname = infotype[5:]
            try:
                hasher = getattr(hashlib, hashname)()
                hashers.append((infotype, hasher))
            except AttributeError:
                log.warning('Could not compute %s hash because it is not available from python\'s hashlib module' % hashname)

        else:
            log.warning('Invalid infotype: %s' % infotype)

    # do all the hashes now, but on a single pass
    if hashers:
        try:
            blocksize = 8192
            hasherobjs = dict(hashers).values()

            with open(filename, 'rb') as f:
                chunk = f.read(blocksize)
                while chunk:
                    for hasher in hasherobjs:
                        hasher.update(chunk)
                    chunk = f.read(blocksize)

            for infotype, hasher in hashers:
                result.append(Guess({infotype: hasher.hexdigest()},
                                    confidence=1.0))
        except Exception as e:
            log.warning('Could not compute hash because: %s' % e)

    result = merge_all(result)

    # last minute adjustments

    # if country is in the guessed properties, make it part of the filename
    if 'series' in result and 'country' in result:
        result['series'] += ' (%s)' % result['country'].alpha2.upper()

    return result
Exemple #9
0
    # do all the hashes now, but on a single pass
    if hashers:
        try:
            blocksize = 8192
            hasherobjs = dict(hashers).values()

            with open(filename, 'rb') as f:
                for chunk in iter(lambda: f.read(blocksize), ''):
                    for hasher in hasherobjs:
                        hasher.update(chunk)

            for infotype, hasher in hashers:
                result.append(Guess({infotype: hasher.hexdigest()},
                                    confidence=1.0))
        except Exception, e:
            log.warning('Could not compute hash because: %s' % e)

    return merge_all(result)


def guess_video_info(filename, info=None):
    return guess_file_info(filename, 'autodetect', info)


def guess_movie_info(filename, info=None):
    return guess_file_info(filename, 'movie', info)


def guess_episode_info(filename, info=None):
    return guess_file_info(filename, 'episode', info)
Exemple #10
0
def guess_file_info(filename, info=None, options=None, **kwargs):
    """info can contain the names of the various plugins, such as 'filename' to
    detect filename info, or 'hash_md5' to get the md5 hash of the file.

    >>> testfile = os.path.join(os.path.dirname(__file__), 'test/dummy.srt')
    >>> g = guess_file_info(testfile, info = ['hash_md5', 'hash_sha1'])
    >>> g['hash_md5'], g['hash_sha1']
    ('64de6b5893cac24456c46a935ef9c359', 'a703fc0fa4518080505809bf562c6fc6f7b3c98c')
    """
    info = info or 'filename'
    options = options or {}

    result = []
    hashers = []

    # Force unicode as soon as possible
    filename = u(filename)

    if isinstance(info, base_text_type):
        info = [info]

    for infotype in info:
        if infotype == 'filename':
            result.append(_guess_filename(filename, options, **kwargs))

        elif infotype == 'hash_mpc':
            from guessit.hash_mpc import hash_file
            try:
                result.append(Guess({infotype: hash_file(filename)},
                                    confidence=1.0))
            except Exception as e:
                log.warning('Could not compute MPC-style hash because: %s' % e)

        elif infotype == 'hash_ed2k':
            from guessit.hash_ed2k import hash_file
            try:
                result.append(Guess({infotype: hash_file(filename)},
                                    confidence=1.0))
            except Exception as e:
                log.warning('Could not compute ed2k hash because: %s' % e)

        elif infotype.startswith('hash_'):
            import hashlib
            hashname = infotype[5:]
            try:
                hasher = getattr(hashlib, hashname)()
                hashers.append((infotype, hasher))
            except AttributeError:
                log.warning('Could not compute %s hash because it is not available from python\'s hashlib module' % hashname)

        else:
            log.warning('Invalid infotype: %s' % infotype)

    # do all the hashes now, but on a single pass
    if hashers:
        try:
            blocksize = 8192
            hasherobjs = dict(hashers).values()

            with open(filename, 'rb') as f:
                chunk = f.read(blocksize)
                while chunk:
                    for hasher in hasherobjs:
                        hasher.update(chunk)
                    chunk = f.read(blocksize)

            for infotype, hasher in hashers:
                result.append(Guess({infotype: hasher.hexdigest()},
                                    confidence=1.0))
        except Exception as e:
            log.warning('Could not compute hash because: %s' % e)

    result = merge_all(result)

    return result
Exemple #11
0
def guess_file_info(filename, info=None, options=None, **kwargs):
    """info can contain the names of the various plugins, such as 'filename' to
    detect filename info, or 'hash_md5' to get the md5 hash of the file.

    >>> testfile = os.path.join(os.path.dirname(__file__), 'test/dummy.srt')
    >>> g = guess_file_info(testfile, info = ['hash_md5', 'hash_sha1'])
    >>> g['hash_md5'], g['hash_sha1']
    ('64de6b5893cac24456c46a935ef9c359', 'a703fc0fa4518080505809bf562c6fc6f7b3c98c')
    """
    info = info or 'filename'
    options = options or {}

    result = []
    hashers = []

    # Force unicode as soon as possible
    filename = u(filename)

    if isinstance(info, base_text_type):
        info = [info]

    for infotype in info:
        if infotype == 'filename':
            result.append(_guess_filename(filename, options, **kwargs))

        elif infotype == 'hash_mpc':
            from guessit.hash_mpc import hash_file
            try:
                result.append(
                    Guess({infotype: hash_file(filename)}, confidence=1.0))
            except Exception as e:
                log.warning('Could not compute MPC-style hash because: %s' % e)

        elif infotype == 'hash_ed2k':
            from guessit.hash_ed2k import hash_file
            try:
                result.append(
                    Guess({infotype: hash_file(filename)}, confidence=1.0))
            except Exception as e:
                log.warning('Could not compute ed2k hash because: %s' % e)

        elif infotype.startswith('hash_'):
            import hashlib
            hashname = infotype[5:]
            try:
                hasher = getattr(hashlib, hashname)()
                hashers.append((infotype, hasher))
            except AttributeError:
                log.warning(
                    'Could not compute %s hash because it is not available from python\'s hashlib module'
                    % hashname)

        else:
            log.warning('Invalid infotype: %s' % infotype)

    # do all the hashes now, but on a single pass
    if hashers:
        try:
            blocksize = 8192
            hasherobjs = dict(hashers).values()

            with open(filename, 'rb') as f:
                chunk = f.read(blocksize)
                while chunk:
                    for hasher in hasherobjs:
                        hasher.update(chunk)
                    chunk = f.read(blocksize)

            for infotype, hasher in hashers:
                result.append(
                    Guess({infotype: hasher.hexdigest()}, confidence=1.0))
        except Exception as e:
            log.warning('Could not compute hash because: %s' % e)

    result = merge_all(result)

    return result
Exemple #12
0
    # do all the hashes now, but on a single pass
    if hashers:
        try:
            blocksize = 8192
            hasherobjs = dict(hashers).values()

            with open(filename, 'rb') as f:
                for chunk in iter(lambda: f.read(blocksize), ''):
                    for hasher in hasherobjs:
                        hasher.update(chunk)

            for infotype, hasher in hashers:
                result.append(
                    Guess({infotype: hasher.hexdigest()}, confidence=1.0))
        except Exception, e:
            log.warning('Could not compute hash because: %s' % e)

    return merge_all(result)


def guess_video_info(filename, info=None):
    return guess_file_info(filename, 'autodetect', info)


def guess_movie_info(filename, info=None):
    return guess_file_info(filename, 'movie', info)


def guess_episode_info(filename, info=None):
    return guess_file_info(filename, 'episode', info)