def guess_release_group(string):
    group_names = [ r'\.(Xvid)-(?P<releaseGroup>.*?)[ \.]',
                    r'\.(DivX)-(?P<releaseGroup>.*?)[\. ]',
                    r'\.(DVDivX)-(?P<releaseGroup>.*?)[\. ]',
                    ]

    # first try to see whether we have both a known codec and a known release group
    group_names = [ r'\.(?P<videoCodec>' + codec + r')-(?P<releaseGroup>.*?)[ \.]'
                    for codec in (CODECS + FORMATS) ]

    for rexp in group_names:
        match = re.search(rexp, string, re.IGNORECASE)
        if match:
            metadata = match.groupdict()
            if canonical_form(metadata['releaseGroup']) in properties['releaseGroup']:
                return adjust_metadata(metadata), (match.start(1), match.end(2))

    # pick anything as releaseGroup as long as we have a codec in front
    # this doesn't include a potential dash ('-') ending the release group
    # eg: [...].X264-HiS@SiLUHD-English.[...]
    group_names = [ r'\.(?P<videoCodec>' + codec + r')-(?P<releaseGroup>.*?)(-(.*?))?[ \.]'
                    for codec in (CODECS + FORMATS) ]

    for rexp in group_names:
        match = re.search(rexp, string, re.IGNORECASE)
        if match:
            return adjust_metadata(match.groupdict()), (match.start(1), match.end(2))

    return None, None
def format_guess(guess):
    """Format all the found values to their natural type.
    For instance, a year would be stored as an int value, etc...

    Note that this modifies the dictionary given as input.
    """
    for prop, value in guess.items():
        if prop in ("season", "episodeNumber", "year", "cdNumber", "cdNumberTotal"):
            guess[prop] = int(guess[prop])
        elif isinstance(value, basestring):
            if prop in ("edition",):
                value = clean_string(value)
            guess[prop] = canonical_form(value)

    return guess
def guess_filetype(filename, filetype = 'autodetect'):
    other = {}

    # look at the extension first
    fileext = os.path.splitext(filename)[1][1:].lower()
    if fileext in subtitle_exts:
        if 'movie' in filetype:
            filetype = 'moviesubtitle'
        elif 'episode' in filetype:
            filetype = 'episodesubtitle'
        else:
            filetype = 'subtitle'
        other = { 'container': fileext }
    elif fileext in video_exts:
        if filetype == 'autodetect':
            filetype = 'video'
        other = { 'container': fileext }
    else:
        if filetype == 'autodetect':
            filetype = 'unknown'
        other = { 'extension': fileext }

    # now look whether there are some specific hints for episode vs movie
    if filetype in ('video', 'subtitle'):
        for rexp, confidence, span_adjust in episode_rexps:
            match = re.search(rexp, filename, re.IGNORECASE)
            if match:
                if filetype == 'video':
                    filetype = 'episode'
                elif filetype == 'subtitle':
                    filetype = 'episodesubtitle'
                break

        for prop, value, start, end in find_properties(filename):
            if canonical_form(value) == 'DVB':
                if filetype == 'video':
                    filetype = 'episode'
                elif filetype == 'subtitle':
                    filetype = 'episodesubtitle'
                break

        # if no episode info found, assume it's a movie
        if filetype == 'video':
            filetype = 'movie'
        elif filetype == 'subtitle':
            filetype = 'moviesubtitle'

    return filetype, other
Exemple #4
0
def format_guess(guess):
    """Format all the found values to their natural type.
    For instance, a year would be stored as an int value, etc...

    Note that this modifies the dictionary given as input.
    """
    for prop, value in list(guess.items()):
        if prop in ('season', 'episodeNumber', 'year', 'cdNumber',
                    'cdNumberTotal', 'bonusNumber', 'filmNumber'):
            guess[prop] = int(guess[prop])
        elif isinstance(value, base_text_type):
            if prop in ('edition', ):
                value = clean_string(value)
            guess[prop] = canonical_form(value).replace('\\', '')

    return guess
Exemple #5
0
def format_guess(guess):
    """Format all the found values to their natural type.
    For instance, a year would be stored as an int value, etc...

    Note that this modifies the dictionary given as input.
    """
    for prop, value in guess.items():
        if prop in ('season', 'episodeNumber', 'year', 'cdNumber',
                    'cdNumberTotal', 'bonusNumber', 'filmNumber'):
            guess[prop] = int(guess[prop])
        elif isinstance(value, base_text_type):
            if prop in ('edition',):
                value = clean_string(value)
            guess[prop] = canonical_form(value).replace('\\', '')

    return guess
Exemple #6
0
def guess_filetype(mtree, filetype):
    # put the filetype inside a dummy container to be able to have the
    # following functions work correctly as closures
    # this is a workaround for python 2 which doesn't have the
    # 'nonlocal' keyword (python 3 does have it)
    filetype_container = [filetype]
    other = {}
    filename = mtree.string

    def upgrade_episode():
        if filetype_container[0] == 'video':
            filetype_container[0] = 'episode'
        elif filetype_container[0] == 'subtitle':
            filetype_container[0] = 'episodesubtitle'

    def upgrade_movie():
        if filetype_container[0] == 'video':
            filetype_container[0] = 'movie'
        elif filetype_container[0] == 'subtitle':
            filetype_container[0] = 'moviesubtitle'

    def upgrade_subtitle():
        if 'movie' in filetype_container[0]:
            filetype_container[0] = 'moviesubtitle'
        elif 'episode' in filetype_container[0]:
            filetype_container[0] = 'episodesubtitle'
        else:
            filetype_container[0] = 'subtitle'

    def upgrade(type='unknown'):
        if filetype_container[0] == 'autodetect':
            filetype_container[0] = type

    # look at the extension first
    fileext = os.path.splitext(filename)[1][1:].lower()
    if fileext in subtitle_exts:
        upgrade_subtitle()
        other = {'container': fileext}
    elif fileext in video_exts:
        upgrade(type='video')
        other = {'container': fileext}
    else:
        upgrade(type='unknown')
        other = {'extension': fileext}

    # check whether we are in a 'Movies', 'Tv Shows', ... folder
    folder_rexps = [(r'Movies?', upgrade_movie),
                    (r'Tv ?Shows?', upgrade_episode),
                    (r'Series', upgrade_episode)]
    for frexp, upgrade_func in folder_rexps:
        frexp = re.compile(frexp, re.IGNORECASE)
        for pathgroup in mtree.children:
            if frexp.match(pathgroup.value):
                upgrade_func()

    # check for a few specific cases which will unintentionally make the
    # following heuristics confused (eg: OSS 117 will look like an episode,
    # season 1, epnum 17, when it is in fact a movie)
    fname = clean_string(filename).lower()
    for m in MOVIES:
        if m in fname:
            upgrade_movie()
    for s in SERIES:
        if s in fname:
            upgrade_episode()

    # now look whether there are some specific hints for episode vs movie
    if filetype_container[0] in ('video', 'subtitle'):
        # if we have an episode_rexp (eg: s02e13), it is an episode
        for rexp, _, _ in episode_rexps:
            match = re.search(rexp, filename, re.IGNORECASE)
            if match:
                upgrade_episode()
                break

        # if we have a 3-4 digit number that's not a year, maybe an episode
        match = re.search(r'[^0-9]([0-9]{3,4})[^0-9]', filename)
        if match:
            fullnumber = int(match.group()[1:-1])
            #season = fullnumber // 100
            epnumber = fullnumber % 100
            possible = True

            # check for validity
            if epnumber > 40:
                possible = False
            if valid_year(fullnumber):
                possible = False

            if possible:
                upgrade_episode()

        # if we have certain properties characteristic of episodes, it is an ep
        for prop, value, _, _ in find_properties(filename):
            log.debug('prop: %s = %s' % (prop, value))
            if prop == 'episodeFormat':
                upgrade_episode()
                break

            elif canonical_form(value) == 'DVB':
                upgrade_episode()
                break

        # origin-specific type
        if 'tvu.org.ru' in filename:
            upgrade_episode()

        # if no episode info found, assume it's a movie
        upgrade_movie()

    filetype = filetype_container[0]
    return filetype, other
def adjust_metadata(md):
    codec = canonical_form(md['videoCodec'])
    if codec in FORMATS:
        md['format'] = codec
        del md['videoCodec']
    return md
def guess_filetype(mtree, filetype):
    # put the filetype inside a dummy container to be able to have the
    # following functions work correctly as closures
    # this is a workaround for python 2 which doesn't have the
    # 'nonlocal' keyword (python 3 does have it)
    filetype_container = [filetype]
    other = {}
    filename = mtree.string

    def upgrade_episode():
        if filetype_container[0] == 'video':
            filetype_container[0] = 'episode'
        elif filetype_container[0] == 'subtitle':
            filetype_container[0] = 'episodesubtitle'

    def upgrade_movie():
        if filetype_container[0] == 'video':
            filetype_container[0] = 'movie'
        elif filetype_container[0] == 'subtitle':
            filetype_container[0] = 'moviesubtitle'

    def upgrade_subtitle():
        if 'movie' in filetype_container[0]:
            filetype_container[0] = 'moviesubtitle'
        elif 'episode' in filetype_container[0]:
            filetype_container[0] = 'episodesubtitle'
        else:
            filetype_container[0] = 'subtitle'

    def upgrade(type='unknown'):
        if filetype_container[0] == 'autodetect':
            filetype_container[0] = type


    # look at the extension first
    fileext = os.path.splitext(filename)[1][1:].lower()
    if fileext in subtitle_exts:
        upgrade_subtitle()
        other = { 'container': fileext }
    elif fileext in video_exts:
        upgrade(type='video')
        other = { 'container': fileext }
    else:
        upgrade(type='unknown')
        other = { 'extension': fileext }



    # check whether we are in a 'Movies', 'Tv Shows', ... folder
    folder_rexps = [ (r'Movies?', upgrade_movie),
                     (r'Tv ?Shows?', upgrade_episode),
                     (r'Series', upgrade_episode)
                     ]
    for frexp, upgrade_func in folder_rexps:
        frexp = re.compile(frexp, re.IGNORECASE)
        for pathgroup in mtree.children:
            if frexp.match(pathgroup.value):
                upgrade_func()

    # check for a few specific cases which will unintentionally make the
    # following heuristics confused (eg: OSS 117 will look like an episode,
    # season 1, epnum 17, when it is in fact a movie)
    fname = clean_string(filename).lower()
    for m in MOVIES:
        if m in fname:
            upgrade_movie()
    for s in SERIES:
        if s in fname:
            upgrade_episode()

    # now look whether there are some specific hints for episode vs movie
    if filetype_container[0] in ('video', 'subtitle'):
        # if we have an episode_rexp (eg: s02e13), it is an episode
        for rexp, _, _ in episode_rexps:
            match = re.search(rexp, filename, re.IGNORECASE)
            if match:
                upgrade_episode()
                break

        # if we have a 3-4 digit number that's not a year, maybe an episode
        match = re.search(r'[^0-9]([0-9]{3,4})[^0-9]', filename)
        if match:
            fullnumber = int(match.group()[1:-1])
            #season = fullnumber // 100
            epnumber = fullnumber % 100
            possible = True

            # check for validity
            if epnumber > 40:
                possible = False
            if valid_year(fullnumber):
                possible = False

            if possible:
                upgrade_episode()

        # if we have certain properties characteristic of episodes, it is an ep
        for prop, value, _, _ in find_properties(filename):
            log.debug('prop: %s = %s' % (prop, value))
            if prop == 'episodeFormat':
                upgrade_episode()
                break

            elif canonical_form(value) == 'DVB':
                upgrade_episode()
                break

        # origin-specific type
        if 'tvu.org.ru' in filename:
            upgrade_episode()

        # if no episode info found, assume it's a movie
        upgrade_movie()

    filetype = filetype_container[0]
    return filetype, other
def guess_filetype(filename, filetype):
    other = {}

    # look at the extension first
    fileext = os.path.splitext(filename)[1][1:].lower()
    if fileext in subtitle_exts:
        if 'movie' in filetype:
            filetype = 'moviesubtitle'
        elif 'episode' in filetype:
            filetype = 'episodesubtitle'
        else:
            filetype = 'subtitle'
        other = { 'container': fileext }
    elif fileext in video_exts:
        if filetype == 'autodetect':
            filetype = 'video'
        other = { 'container': fileext }
    else:
        if filetype == 'autodetect':
            filetype = 'unknown'
        other = { 'extension': fileext }

    # put the filetype inside a dummy container to be able to have the
    # following functions work correctly as closures
    # this is a workaround for python 2 which doesn't have the
    # 'nonlocal' keyword (python 3 does have it)
    filetype_container = [filetype]

    def upgrade_episode():
        if filetype_container[0] == 'video':
            filetype_container[0] = 'episode'
        elif filetype_container[0] == 'subtitle':
            filetype_container[0] = 'episodesubtitle'

    def upgrade_movie():
        if filetype_container[0] == 'video':
            filetype_container[0] = 'movie'
        elif filetype_container[0] == 'subtitle':
            filetype_container[0] = 'moviesubtitle'

    # now look whether there are some specific hints for episode vs movie
    if filetype in ('video', 'subtitle'):
        for rexp, _, _ in episode_rexps:
            match = re.search(rexp, filename, re.IGNORECASE)
            if match:
                upgrade_episode()
                break

        for prop, value, _, _ in find_properties(filename):
            log.debug('prop: %s = %s' % (prop, value))
            if prop == 'episodeFormat':
                upgrade_episode()
                break

            elif canonical_form(value) == 'DVB':
                upgrade_episode()
                break

        # if no episode info found, assume it's a movie
        upgrade_movie()

    filetype = filetype_container[0]
    return filetype, other
Exemple #10
0
def guess_filetype(filename, filetype):
    other = {}

    # look at the extension first
    fileext = os.path.splitext(filename)[1][1:].lower()
    if fileext in subtitle_exts:
        if 'movie' in filetype:
            filetype = 'moviesubtitle'
        elif 'episode' in filetype:
            filetype = 'episodesubtitle'
        else:
            filetype = 'subtitle'
        other = {'container': fileext}
    elif fileext in video_exts:
        if filetype == 'autodetect':
            filetype = 'video'
        other = {'container': fileext}
    else:
        if filetype == 'autodetect':
            filetype = 'unknown'
        other = {'extension': fileext}

    # put the filetype inside a dummy container to be able to have the
    # following functions work correctly as closures
    # this is a workaround for python 2 which doesn't have the
    # 'nonlocal' keyword (python 3 does have it)
    filetype_container = [filetype]

    def upgrade_episode():
        if filetype_container[0] == 'video':
            filetype_container[0] = 'episode'
        elif filetype_container[0] == 'subtitle':
            filetype_container[0] = 'episodesubtitle'

    def upgrade_movie():
        if filetype_container[0] == 'video':
            filetype_container[0] = 'movie'
        elif filetype_container[0] == 'subtitle':
            filetype_container[0] = 'moviesubtitle'

    # now look whether there are some specific hints for episode vs movie
    if filetype in ('video', 'subtitle'):
        for rexp, _, _ in episode_rexps:
            match = re.search(rexp, filename, re.IGNORECASE)
            if match:
                upgrade_episode()
                break

        for prop, value, _, _ in find_properties(filename):
            log.debug('prop: %s = %s' % (prop, value))
            if prop == 'episodeFormat':
                upgrade_episode()
                break

            elif canonical_form(value) == 'DVB':
                upgrade_episode()
                break

        if 'tvu.org.ru' in filename:
            upgrade_episode()

        # if no episode info found, assume it's a movie
        upgrade_movie()

    filetype = filetype_container[0]
    return filetype, other