Example #1
0
def main(args=None, setup_logging=True):
    if setup_logging:
        from guessit import slogging
        slogging.setup_logging()

    if PY2:  # pragma: no cover
        import codecs
        import locale
        import sys

        # see http://bugs.python.org/issue2128
        if os.name == 'nt':
            for i, a in enumerate(sys.argv):
                sys.argv[i] = a.decode(locale.getpreferredencoding())

        # see https://github.com/wackou/guessit/issues/43
        # and http://stackoverflow.com/questions/4545661/unicodedecodeerror-when-redirecting-to-file
        # Wrap sys.stdout into a StreamWriter to allow writing unicode.
        sys.stdout = codecs.getwriter(locale.getpreferredencoding())(
            sys.stdout)

    # Needed for guessit.plugins.transformers.reload() to be called.
    from guessit.plugins import transformers

    if args:
        options = get_opts().parse_args(args)
    else:  # pragma: no cover
        options = get_opts().parse_args()
    if options.verbose:
        logging.getLogger().setLevel(logging.DEBUG)

    help_required = True
    if options.properties or options.values:
        display_properties(options)
        help_required = False
    elif options.transformers:
        display_transformers()
        help_required = False

    if options.demo:
        run_demo(episodes=True, movies=True, options=vars(options))
        help_required = False

    if options.version:
        print('+-------------------------------------------------------+')
        print('+                   GuessIt ' + __version__ +
              (28 - len(__version__)) * ' ' + '+')
        print('+-------------------------------------------------------+')
        print('|      Please report any bug or feature request at      |')
        print('|       https://github.com/wackou/guessit/issues.       |')
        print('+-------------------------------------------------------+')
        help_required = False

    if options.yaml:
        try:
            import yaml, babelfish

            def default_representer(dumper, data):
                return dumper.represent_str(str(data))

            yaml.SafeDumper.add_representer(babelfish.Language,
                                            default_representer)
            yaml.SafeDumper.add_representer(babelfish.Country,
                                            default_representer)
        except ImportError:  # pragma: no cover
            print('PyYAML not found. Using default output.')

    filenames = []
    if options.filename:
        filenames.extend(options.filename)
    if options.input_file:
        input_file = open(options.input_file, 'r')
        try:
            filenames.extend([line.strip() for line in input_file.readlines()])
        finally:
            input_file.close()

    filenames = filter(lambda f: f, filenames)

    if filenames:
        if options.submit_bug:
            for filename in filenames:
                help_required = False
                submit_bug(filename, options)
        else:
            for filename in filenames:
                help_required = False
                guess_file(filename,
                           info=options.info.split(','),
                           options=vars(options))

    if help_required:  # pragma: no cover
        get_opts().print_help()
Example #2
0
def guess_file_info(filename, info=None, options=None, **kwargs):
    """info can contain the names of the various plugins, such as 'filename' to
    detect filename info, or 'hash_md5' to get the md5 hash of the file.

    >>> testfile = os.path.join(os.path.dirname(__file__), 'test/dummy.srt')
    >>> g = guess_file_info(testfile, info = ['hash_md5', 'hash_sha1'])
    >>> g['hash_md5'], g['hash_sha1']
    ('64de6b5893cac24456c46a935ef9c359', 'a703fc0fa4518080505809bf562c6fc6f7b3c98c')
    """
    info = info or 'filename'
    options = options or {}

    if isinstance(options, base_text_type):
        args = shlex.split(options)
        options = vars(get_opts().parse_args(args))
    if default_options:
        if isinstance(default_options, base_text_type):
            default_args = shlex.split(default_options)
            merged_options = vars(get_opts().parse_args(default_args))
        else:
            merged_options = deepcopy(default_options)
        merged_options.update(options)
        options = merged_options

    result = []
    hashers = []

    # Force unicode as soon as possible
    filename = u(filename)

    if isinstance(info, base_text_type):
        info = [info]

    for infotype in info:
        if infotype == 'filename':
            result.append(_guess_filename(filename, options, **kwargs))

        elif infotype == 'hash_mpc':
            from guessit.hash_mpc import hash_file
            try:
                result.append(
                    Guess({infotype: hash_file(filename)}, confidence=1.0))
            except Exception as e:
                log.warning('Could not compute MPC-style hash because: %s' % e)

        elif infotype == 'hash_ed2k':
            from guessit.hash_ed2k import hash_file
            try:
                result.append(
                    Guess({infotype: hash_file(filename)}, confidence=1.0))
            except Exception as e:
                log.warning('Could not compute ed2k hash because: %s' % e)

        elif infotype.startswith('hash_'):
            import hashlib
            hashname = infotype[5:]
            try:
                hasher = getattr(hashlib, hashname)()
                hashers.append((infotype, hasher))
            except AttributeError:
                log.warning(
                    'Could not compute %s hash because it is not available from python\'s hashlib module'
                    % hashname)

        elif infotype == 'video':
            g = guess_video_metadata(filename)
            if g:
                result.append(g)

        else:
            log.warning('Invalid infotype: %s' % infotype)

    # do all the hashes now, but on a single pass
    if hashers:
        try:
            blocksize = 8192
            hasherobjs = dict(hashers).values()

            with open(filename, 'rb') as f:
                chunk = f.read(blocksize)
                while chunk:
                    for hasher in hasherobjs:
                        hasher.update(chunk)
                    chunk = f.read(blocksize)

            for infotype, hasher in hashers:
                result.append(
                    Guess({infotype: hasher.hexdigest()}, confidence=1.0))
        except Exception as e:
            log.warning('Could not compute hash because: %s' % e)

    result = smart_merge(result)

    return result
Example #3
0
def guess_file_info(filename, info=None, options=None, **kwargs):
    """info can contain the names of the various plugins, such as 'filename' to
    detect filename info, or 'hash_md5' to get the md5 hash of the file.

    >>> testfile = os.path.join(os.path.dirname(__file__), 'test/dummy.srt')
    >>> g = guess_file_info(testfile, info = ['hash_md5', 'hash_sha1'])
    >>> g['hash_md5'], g['hash_sha1']
    ('64de6b5893cac24456c46a935ef9c359', 'a703fc0fa4518080505809bf562c6fc6f7b3c98c')
    """
    info = info or 'filename'
    options = options or {}

    if isinstance(options, base_text_type):
        args = shlex.split(options)
        options = vars(get_opts().parse_args(args))
    if default_options:
        if isinstance(default_options, base_text_type):
            default_args = shlex.split(default_options)
            merged_options = vars(get_opts().parse_args(default_args))
        else:
            merged_options = deepcopy(default_options)
        merged_options.update(options)
        options = merged_options

    result = []
    hashers = []

    # Force unicode as soon as possible
    filename = u(filename)

    if isinstance(info, base_text_type):
        info = [info]

    for infotype in info:
        if infotype == 'filename':
            result.append(_guess_filename(filename, options, **kwargs))

        elif infotype == 'hash_mpc':
            from guessit.hash_mpc import hash_file
            try:
                result.append(Guess({infotype: hash_file(filename)},
                                    confidence=1.0))
            except Exception as e:
                log.warning('Could not compute MPC-style hash because: %s' % e)

        elif infotype == 'hash_ed2k':
            from guessit.hash_ed2k import hash_file
            try:
                result.append(Guess({infotype: hash_file(filename)},
                                    confidence=1.0))
            except Exception as e:
                log.warning('Could not compute ed2k hash because: %s' % e)

        elif infotype.startswith('hash_'):
            import hashlib
            hashname = infotype[5:]
            try:
                hasher = getattr(hashlib, hashname)()
                hashers.append((infotype, hasher))
            except AttributeError:
                log.warning('Could not compute %s hash because it is not available from python\'s hashlib module' % hashname)

        elif infotype == 'video':
            g = guess_video_metadata(filename)
            if g:
                result.append(g)

        else:
            log.warning('Invalid infotype: %s' % infotype)

    # do all the hashes now, but on a single pass
    if hashers:
        try:
            blocksize = 8192
            hasherobjs = dict(hashers).values()

            with open(filename, 'rb') as f:
                chunk = f.read(blocksize)
                while chunk:
                    for hasher in hasherobjs:
                        hasher.update(chunk)
                    chunk = f.read(blocksize)

            for infotype, hasher in hashers:
                result.append(Guess({infotype: hasher.hexdigest()},
                                    confidence=1.0))
        except Exception as e:
            log.warning('Could not compute hash because: %s' % e)

    result = smart_merge(result)

    return result
Example #4
0
def main(args=None, setup_logging=True):
    if setup_logging:
        from guessit import slogging
        slogging.setup_logging()

    if PY2:  # pragma: no cover
        import codecs
        import locale
        import sys

        # see http://bugs.python.org/issue2128
        if os.name == 'nt':
            for i, a in enumerate(sys.argv):
                sys.argv[i] = a.decode(locale.getpreferredencoding())

        # see https://github.com/wackou/guessit/issues/43
        # and http://stackoverflow.com/questions/4545661/unicodedecodeerror-when-redirecting-to-file
        # Wrap sys.stdout into a StreamWriter to allow writing unicode.
        sys.stdout = codecs.getwriter(locale.getpreferredencoding())(sys.stdout)

    # Needed for guessit.plugins.transformers.reload() to be called.
    from guessit.plugins import transformers

    if args:
        options = get_opts().parse_args(args)
    else:  # pragma: no cover
        options = get_opts().parse_args()
    if options.verbose:
        logging.getLogger().setLevel(logging.DEBUG)

    help_required = True
    if options.properties or options.values:
        display_properties(options)
        help_required = False
    elif options.transformers:
        display_transformers()
        help_required = False

    if options.demo:
        run_demo(episodes=True, movies=True, options=vars(options))
        help_required = False

    if options.version:
        print('+-------------------------------------------------------+')
        print('+                   GuessIt ' + __version__ + (28-len(__version__)) * ' ' + '+')
        print('+-------------------------------------------------------+')
        print('|      Please report any bug or feature request at      |')
        print('|       https://github.com/wackou/guessit/issues.       |')
        print('+-------------------------------------------------------+')
        help_required = False

    if options.yaml:
        try:
            import yaml, babelfish
            def default_representer(dumper, data):
                return dumper.represent_str(str(data))
            yaml.SafeDumper.add_representer(babelfish.Language, default_representer)
            yaml.SafeDumper.add_representer(babelfish.Country, default_representer)
        except ImportError:  # pragma: no cover
            print('PyYAML not found. Using default output.')

    filenames = []
    if options.filename:
        filenames.extend(options.filename)
    if options.input_file:
        input_file = open(options.input_file, 'r')
        try:
            filenames.extend([line.strip() for line in input_file.readlines()])
        finally:
            input_file.close()

    filenames = filter(lambda f: f, filenames)

    if filenames:
        if options.submit_bug:
            for filename in filenames:
                help_required = False
                submit_bug(filename, options)
        else:
            for filename in filenames:
                help_required = False
                guess_file(filename,
                           info=options.info.split(','),
                           options=vars(options))

    if help_required:  # pragma: no cover
        get_opts().print_help()
Example #5
0
    def checkFields(self, groundTruth, guess_func, remove_type=True,
                    exclude_files=None):
        total = 0
        exclude_files = exclude_files or []

        fails = defaultdict(list)
        additionals = defaultdict(list)

        for filename, required_fields in groundTruth.items():
            filename = u(filename)
            if filename in exclude_files:
                continue

            log.debug('\n' + '-' * 120)
            log.info('Guessing information for file: %s' % filename)

            options = required_fields.pop('options') if 'options' in required_fields else None

            if options:
                args = shlex.split(options)
                options = get_opts().parse_args(args)
                options = vars(options)
            try:
                found = guess_func(filename, options)
            except Exception as e:
                fails[filename].append("An exception has occured in %s: %s" % (filename, e))
                log.exception("An exception has occured in %s: %s" % (filename, e))
                continue

            total = total + 1

            # no need for these in the unittests
            if remove_type:
                try:
                    del found['type']
                except:
                    pass
            for prop in ('container', 'mimetype', 'unidentified'):
                if prop in found:
                    del found[prop]

            # props which are list of just 1 elem should be opened for easier writing of the tests
            for prop in ('language', 'subtitleLanguage', 'other', 'episodeDetails', 'unidentified'):
                value = found.get(prop, None)
                if isinstance(value, list) and len(value) == 1:
                    found[prop] = value[0]

            # look for missing properties
            for prop, value in required_fields.items():
                if prop not in found:
                    log.debug("Prop '%s' not found in: %s" % (prop, filename))
                    fails[filename].append("'%s' not found in: %s" % (prop, filename))
                    continue

                # if both properties are strings, do a case-insensitive comparison
                if (isinstance(value, base_text_type) and
                    isinstance(found[prop], base_text_type)):
                    if value.lower() != found[prop].lower():
                        log.debug("Wrong prop value [str] for '%s': expected = '%s' - received = '%s'" % (prop, u(value), u(found[prop])))
                        fails[filename].append("'%s': expected = '%s' - received = '%s'" % (prop, u(value), u(found[prop])))

                elif isinstance(value, list) and isinstance(found[prop], list):
                    if found[prop] and isinstance(found[prop][0], babelfish.Language):
                        # list of languages
                        s1 = set(Language.fromguessit(s) for s in value)
                        s2 = set(found[prop])
                    else:
                        # by default we assume list of strings and do a case-insensitive
                        # comparison on their elements
                        s1 = set(u(s).lower() for s in value)
                        s2 = set(u(s).lower() for s in found[prop])

                    if s1 != s2:
                        log.debug("Wrong prop value [list] for '%s': expected = '%s' - received = '%s'" % (prop, u(value), u(found[prop])))
                        fails[filename].append("'%s': expected = '%s' - received = '%s'" % (prop, u(value), u(found[prop])))

                elif isinstance(found[prop], babelfish.Language):
                    try:
                        if babelfish.Language.fromguessit(value) != found[prop]:
                            raise ValueError
                    except:
                        log.debug("Wrong prop value [Language] for '%s': expected = '%s' - received = '%s'" % (prop, u(value), u(found[prop])))
                        fails[filename].append("'%s': expected = '%s' - received = '%s'" % (prop, u(value), u(found[prop])))

                elif isinstance(found[prop], babelfish.Country):
                    try:
                        if babelfish.Country.fromguessit(value) != found[prop]:
                            raise ValueError
                    except:
                        log.debug("Wrong prop value [Country] for '%s': expected = '%s' - received = '%s'" % (prop, u(value), u(found[prop])))
                        fails[filename].append("'%s': expected = '%s' - received = '%s'" % (prop, u(value), u(found[prop])))


                # otherwise, just compare their values directly
                else:
                    if found[prop] != value:
                        log.debug("Wrong prop value for '%s': expected = '%s' [%s] - received = '%s' [%s]" % (prop, u(value), type(value), u(found[prop]), type(found[prop])))
                        fails[filename].append("'%s': expected = '%s' [%s] - received = '%s' [%s]" % (prop, u(value), type(value), u(found[prop]), type(found[prop])))

            # look for additional properties
            for prop, value in found.items():
                if prop not in required_fields:
                    log.debug("Found additional info for prop = '%s': '%s'" % (prop, u(value)))
                    additionals[filename].append("'%s': '%s'" % (prop, u(value)))

        correct = total - len(fails)
        log.info('SUMMARY: Guessed correctly %d out of %d filenames' % (correct, total))

        for failed_entry, failed_properties in fails.items():
            log.error('---- ' + failed_entry + ' ----')
            for failed_property in failed_properties:
                log.error("FAILED: " + failed_property)

        for additional_entry, additional_properties in additionals.items():
            log.warning('---- ' + additional_entry + ' ----')
            for additional_property in additional_properties:
                log.warning("ADDITIONAL: " + additional_property)

        self.assertTrue(correct == total,
                        msg='Correct: %d < Total: %d' % (correct, total))
Example #6
0
    def checkFields(self,
                    groundTruth,
                    guess_func,
                    remove_type=True,
                    exclude_files=None):
        total = 0
        exclude_files = exclude_files or []

        fails = defaultdict(list)
        additionals = defaultdict(list)

        for filename, required_fields in groundTruth.items():
            filename = u(filename)
            if filename in exclude_files:
                continue

            log.debug('\n' + '-' * 120)
            log.info('Guessing information for file: %s' % filename)

            options = required_fields.pop(
                'options') if 'options' in required_fields else None

            if options:
                args = shlex.split(options)
                options = get_opts().parse_args(args)
                options = vars(options)
            try:
                found = guess_func(filename, options)
            except Exception as e:
                fails[filename].append("An exception has occured in %s: %s" %
                                       (filename, e))
                log.exception("An exception has occured in %s: %s" %
                              (filename, e))
                continue

            total = total + 1

            # no need for these in the unittests
            if remove_type:
                try:
                    del found['type']
                except:
                    pass
            for prop in ('container', 'mimetype', 'unidentified'):
                if prop in found:
                    del found[prop]

            # props which are list of just 1 elem should be opened for easier writing of the tests
            for prop in ('language', 'subtitleLanguage', 'other',
                         'episodeDetails', 'unidentified'):
                value = found.get(prop, None)
                if isinstance(value, list) and len(value) == 1:
                    found[prop] = value[0]

            # look for missing properties
            for prop, value in required_fields.items():
                if prop not in found:
                    log.debug("Prop '%s' not found in: %s" % (prop, filename))
                    fails[filename].append("'%s' not found in: %s" %
                                           (prop, filename))
                    continue

                # if both properties are strings, do a case-insensitive comparison
                if (isinstance(value, base_text_type)
                        and isinstance(found[prop], base_text_type)):
                    if value.lower() != found[prop].lower():
                        log.debug(
                            "Wrong prop value [str] for '%s': expected = '%s' - received = '%s'"
                            % (prop, u(value), u(found[prop])))
                        fails[filename].append(
                            "'%s': expected = '%s' - received = '%s'" %
                            (prop, u(value), u(found[prop])))

                elif isinstance(value, list) and isinstance(found[prop], list):
                    if found[prop] and isinstance(found[prop][0],
                                                  babelfish.Language):
                        # list of languages
                        s1 = set(Language.fromguessit(s) for s in value)
                        s2 = set(found[prop])
                    else:
                        # by default we assume list of strings and do a case-insensitive
                        # comparison on their elements
                        s1 = set(u(s).lower() for s in value)
                        s2 = set(u(s).lower() for s in found[prop])

                    if s1 != s2:
                        log.debug(
                            "Wrong prop value [list] for '%s': expected = '%s' - received = '%s'"
                            % (prop, u(value), u(found[prop])))
                        fails[filename].append(
                            "'%s': expected = '%s' - received = '%s'" %
                            (prop, u(value), u(found[prop])))

                elif isinstance(found[prop], babelfish.Language):
                    try:
                        if babelfish.Language.fromguessit(
                                value) != found[prop]:
                            raise ValueError
                    except:
                        log.debug(
                            "Wrong prop value [Language] for '%s': expected = '%s' - received = '%s'"
                            % (prop, u(value), u(found[prop])))
                        fails[filename].append(
                            "'%s': expected = '%s' - received = '%s'" %
                            (prop, u(value), u(found[prop])))

                elif isinstance(found[prop], babelfish.Country):
                    try:
                        if babelfish.Country.fromguessit(value) != found[prop]:
                            raise ValueError
                    except:
                        log.debug(
                            "Wrong prop value [Country] for '%s': expected = '%s' - received = '%s'"
                            % (prop, u(value), u(found[prop])))
                        fails[filename].append(
                            "'%s': expected = '%s' - received = '%s'" %
                            (prop, u(value), u(found[prop])))

                # otherwise, just compare their values directly
                else:
                    if found[prop] != value:
                        log.debug(
                            "Wrong prop value for '%s': expected = '%s' [%s] - received = '%s' [%s]"
                            % (prop, u(value), type(value), u(
                                found[prop]), type(found[prop])))
                        fails[filename].append(
                            "'%s': expected = '%s' [%s] - received = '%s' [%s]"
                            % (prop, u(value), type(value), u(
                                found[prop]), type(found[prop])))

            # look for additional properties
            for prop, value in found.items():
                if prop not in required_fields:
                    log.debug("Found additional info for prop = '%s': '%s'" %
                              (prop, u(value)))
                    additionals[filename].append("'%s': '%s'" %
                                                 (prop, u(value)))

        correct = total - len(fails)
        log.info('SUMMARY: Guessed correctly %d out of %d filenames' %
                 (correct, total))

        for failed_entry, failed_properties in fails.items():
            log.error('---- ' + failed_entry + ' ----')
            for failed_property in failed_properties:
                log.error("FAILED: " + failed_property)

        for additional_entry, additional_properties in additionals.items():
            log.warning('---- ' + additional_entry + ' ----')
            for additional_property in additional_properties:
                log.warning("ADDITIONAL: " + additional_property)

        self.assertTrue(correct == total,
                        msg='Correct: %d < Total: %d' % (correct, total))