예제 #1
0
def guess_properties(string):
    found = container.find_properties(string)
    if found:
        prop, span = found[0]
        guess = Guess(confidence=prop.confidence)
        guess[prop.name] = prop.canonical_form
        return guess, span
    return None, None
예제 #2
0
def guess_filetype(mtree, filetype):
    # put the filetype inside a dummy container to be able to have the
    # following functions work correctly as closures
    # this is a workaround for python 2 which doesn't have the
    # 'nonlocal' keyword (python 3 does have it)
    filetype_container = [filetype]
    other = {}
    filename = mtree.string

    def upgrade_episode():
        if filetype_container[0] == 'video':
            filetype_container[0] = 'episode'
        elif filetype_container[0] == 'subtitle':
            filetype_container[0] = 'episodesubtitle'
        elif filetype_container[0] == 'info':
            filetype_container[0] = 'episodeinfo'

    def upgrade_movie():
        if filetype_container[0] == 'video':
            filetype_container[0] = 'movie'
        elif filetype_container[0] == 'subtitle':
            filetype_container[0] = 'moviesubtitle'
        elif filetype_container[0] == 'info':
            filetype_container[0] = 'movieinfo'

    def upgrade_subtitle():
        if 'movie' in filetype_container[0]:
            filetype_container[0] = 'moviesubtitle'
        elif 'episode' in filetype_container[0]:
            filetype_container[0] = 'episodesubtitle'
        else:
            filetype_container[0] = 'subtitle'

    def upgrade_info():
        if 'movie' in filetype_container[0]:
            filetype_container[0] = 'movieinfo'
        elif 'episode' in filetype_container[0]:
            filetype_container[0] = 'episodeinfo'
        else:
            filetype_container[0] = 'info'

    def upgrade(type='unknown'):
        if filetype_container[0] == 'autodetect':
            filetype_container[0] = type

    # look at the extension first
    fileext = os.path.splitext(filename)[1][1:].lower()
    if fileext in subtitle_exts:
        upgrade_subtitle()
        other = {'container': fileext}
    elif fileext in info_exts:
        upgrade_info()
        other = {'container': fileext}
    elif fileext in video_exts:
        upgrade(type='video')
        other = {'container': fileext}
    else:
        upgrade(type='unknown')
        other = {'extension': fileext}

    # check whether we are in a 'Movies', 'Tv Shows', ... folder
    folder_rexps = [
                    (r'Movies?', upgrade_movie),
                    (r'Tv[ _-]?Shows?', upgrade_episode),
                    (r'Series', upgrade_episode)
                    ]
    for frexp, upgrade_func in folder_rexps:
        frexp = re.compile(frexp, re.IGNORECASE)
        for pathgroup in mtree.children:
            if frexp.match(pathgroup.value):
                upgrade_func()

    # check for a few specific cases which will unintentionally make the
    # following heuristics confused (eg: OSS 117 will look like an episode,
    # season 1, epnum 17, when it is in fact a movie)
    fname = clean_string(filename).lower()
    for m in MOVIES:
        if m in fname:
            log.debug('Found in exception list of movies -> type = movie')
            upgrade_movie()
    for s in SERIES:
        if s in fname:
            log.debug('Found in exception list of series -> type = episode')
            upgrade_episode()

    # now look whether there are some specific hints for episode vs movie
    if filetype_container[0] in ('video', 'subtitle', 'info'):
        # if we have an episode_rexp (eg: s02e13), it is an episode
        for rexp, _, _ in episode_rexps:
            match = re.search(rexp, filename, re.IGNORECASE)
            if match:
                log.debug('Found matching regexp: "%s" (string = "%s") -> type = episode', rexp, match.group())
                upgrade_episode()
                break

        # if we have a 3-4 digit number that's not a year, maybe an episode
        match = re.search(r'[^0-9]([0-9]{3,4})[^0-9]', filename)
        if match:
            fullnumber = int(match.group()[1:-1])
            #season = fullnumber // 100
            epnumber = fullnumber % 100
            possible = True

            # check for validity
            if epnumber > 40:
                possible = False
            if valid_year(fullnumber):
                possible = False

            if possible:
                log.debug('Found possible episode number: %s (from string "%s") -> type = episode', epnumber, match.group())
                upgrade_episode()

        # if we have certain properties characteristic of episodes, it is an ep
        for prop, _ in container.find_properties(filename, 'episodeFormat'):
            log.debug('prop: %s' % prop)
            log.debug('Found characteristic property of episodes: %s"', prop)
            upgrade_episode()

        for prop, _ in container.find_properties(filename, 'format'):
            if container.compute_canonical_form('format', prop.canonical_form) == 'DVB':
                log.debug('Found characteristic property of episodes: %s', prop)
                upgrade_episode()
                break

        # origin-specific type
        if 'tvu.org.ru' in filename:
            log.debug('Found characteristic property of episodes: %s', 'tvu.org.ru')
            upgrade_episode()

        # if no episode info found, assume it's a movie
        log.debug('Nothing characteristic found, assuming type = movie')
        upgrade_movie()

    filetype = filetype_container[0]
    return filetype, other
예제 #3
0
def guess_properties(string):
    found = container.find_properties(string)
    return container.as_guess(found, string)