Exemple #1
0
def set_season_number(number, token):
    if not number.isdigit():
        return False

    Elements.insert(ElementCategory.ANIME_SEASON, number)
    token.category = TokenCategory.IDENTIFIER
    return True
Exemple #2
0
def build_element(category, token_begin=None, token_end=None,
                  keep_delimiters=False):
    element = ''

    for token in Tokens.get_list(begin=token_begin, end=token_end):
        if token.category == TokenCategory.UNKNOWN:
            element += token.content
            token.category = TokenCategory.IDENTIFIER
        elif token.category == TokenCategory.BRACKET:
            element += token.content
        elif token.category == TokenCategory.DELIMITER:
            delimiter = token.content
            if keep_delimiters:
                element += delimiter
            elif token != token_begin and token != token_end:
                if delimiter == ',' or delimiter == '&':
                    element += delimiter
                else:
                    element += ' '

    if not keep_delimiters:
        element = element.strip(' ' + DASHES)

    if element:
        Elements.insert(category, element)
Exemple #3
0
def set_volume_number(number, token, validate):
    if validate:
        if not is_valid_volume_number(number):
            return False

    Elements.insert(ElementCategory.VOLUME_NUMBER, number)
    token.category = TokenCategory.IDENTIFIER
    return True
Exemple #4
0
def match_single_episode_pattern(word, token):
    pattern = '(\\d{1,3})[vV](\\d)$'
    match = re.match(pattern, word)
    if match:
        set_episode_number(match.group(1), token, validate=False)
        Elements.insert(ElementCategory.RELEASE_VERSION, match.group(2))
        return True

    return False
Exemple #5
0
    def search_for_keywords(self):
        for token in Tokens.get_list(TokenFlags.UNKNOWN):
            word = token.content
            word = word.strip(' -')

            if not word:
                continue
            # Don't bother if the word is a number that cannot be CRC
            if len(word) != 8 and word.isdigit():
                continue

            category = ElementCategory.UNKNOWN
            keyword = keyword_manager.find(keyword_manager.normalize(word))
            if keyword:
                category = keyword.category
                if not self.options['parse_release_group'] and \
                        category == ElementCategory.RELEASE_GROUP:
                    continue
                if not ElementCategory.is_searchable(category) or \
                        not keyword.options.searchable:
                    continue
                if ElementCategory.is_singular(category) and \
                        Elements.contains(category):
                    continue

                if category == ElementCategory.ANIME_SEASON_PREFIX:
                    parser_helper.check_anime_season_keyword(token)
                    continue
                elif category == ElementCategory.EPISODE_PREFIX:
                    if keyword.options.valid:
                        parser_number.check_extent_keyword(
                            ElementCategory.EPISODE_NUMBER, token)
                    continue
                elif category == ElementCategory.RELEASE_VERSION:
                    word = word[1:]  # number without "v"
                elif category == ElementCategory.VOLUME_PREFIX:
                    parser_number.check_extent_keyword(
                        ElementCategory.VOLUME_NUMBER, token)
                    continue
            else:
                if not Elements.contains(ElementCategory.FILE_CHECKSUM) and \
                        parser_helper.is_crc32(word):
                    category = ElementCategory.FILE_CHECKSUM
                elif not Elements.contains(ElementCategory.VIDEO_RESOLUTION) \
                        and parser_helper.is_resolution(word):
                    category = ElementCategory.VIDEO_RESOLUTION

            if category != ElementCategory.UNKNOWN:
                Elements.insert(category, word)
                if keyword is None or keyword.options.identifiable:
                    token.category = TokenCategory.IDENTIFIER
Exemple #6
0
def match_season_and_episode_pattern(word, token):
    pattern = 'S?(\\d{1,2})(?:-S?(\\d{1,2}))?' +\
              '(?:x|[ ._-x]?E)(\\d{1,3})(?:-E?(\\d{1,3}))?$'
    match = re.match(pattern, word, flags=re.IGNORECASE)

    if match:
        Elements.insert(ElementCategory.ANIME_SEASON, match.group(1))
        if match.group(2):
            Elements.insert(ElementCategory.ANIME_SEASON, match.group(2))
        set_episode_number(match.group(3), token, validate=False)
        if match.group(4):
            set_episode_number(match.group(4), token, validate=False)
        return True

    return False
Exemple #7
0
def match_multi_volume_pattern(word, token):
    pattern = '(\\d{1,2})[-~&+](\\d{1,2})(?:[vV](\\d))?$'
    match = re.match(pattern, word)

    if match:
        lower_bound = match.group(1)
        upper_bound = match.group(2)
        if int(lower_bound) < int(upper_bound):
            if set_volume_number(lower_bound, token, validate=True):
                set_volume_number(upper_bound, token, validate=False)
                if match.group(3):
                    Elements.insert(ElementCategory.RELEASE_VERSION,
                                    match.group(3))
                return True

    return False
Exemple #8
0
def match_number_sign_pattern(word, token):
    if word[0] != '#':
        return False

    pattern = '#(\\d{1,3})(?:[-~&+](\\d{1,3}))?(?:[vV](\\d))?$'
    match = re.match(pattern, word)

    if match:
        if set_episode_number(match.group(1), token, validate=True):
            if match.group(2):
                set_episode_number(match.group(2), token, validate=True)
            if match.group(3):
                Elements.insert(ElementCategory.RELEASE_VERSION,
                                match.group(3))
            return True

    return False
Exemple #9
0
def parse(filename, options=default_options):
    Elements.clear()
    Tokens.clear()

    # Add missing options
    for key, value in default_options.items():
        options.setdefault(key, value)

    Elements.insert(ElementCategory.FILE_NAME, filename)
    if options['parse_file_extension']:
        filename, extension = remove_extension_from_filename(filename)
        if extension:
            Elements.insert(ElementCategory.FILE_EXTENSION, extension)

    if options['ignored_strings']:
        filename = remove_ignored_strings_from_filename(
            filename, options['ignored_strings'])

    if not filename:
        return None

    tokenizer = Tokenizer(filename, options)
    if not tokenizer.tokenize():
        return None

    parser = Parser(options)
    if not parser.parse():
        return None

    return Elements.get_dictionary()
Exemple #10
0
def match_multi_episode_pattern(word, token):
    pattern = '(\\d{1,3})(?:[vV](\\d))?[-~&+](\\d{1,3})(?:[vV](\\d))?$'
    match = re.match(pattern, word)
    if match:
        lower_bound = match.group(1)
        upper_bound = match.group(3)
        # Avoid matching expressions such as "009-1" or "5-2"
        if int(lower_bound) < int(upper_bound):
            if set_episode_number(lower_bound, token, validate=True):
                set_episode_number(upper_bound, token, validate=False)
                if match.group(2):
                    Elements.insert(ElementCategory.RELEASE_VERSION,
                                    match.group(2))
                if match.group(4):
                    Elements.insert(ElementCategory.RELEASE_VERSION,
                                    match.group(4))
                return True

    return False
Exemple #11
0
    def parse(self):
        self.search_for_keywords()

        self.search_for_isolated_numbers()

        if self.options['parse_episode_number']:
            self.search_for_episode_number()

        self.search_for_anime_title()

        if self.options['parse_release_group'] and \
                not Elements.contains(ElementCategory.RELEASE_GROUP):
            self.search_for_release_group()

        if self.options['parse_episode_title'] and \
                Elements.contains(ElementCategory.EPISODE_NUMBER):
            self.search_for_episode_title()

        self.validate_elements()

        return not Elements.empty()
Exemple #12
0
    def peek(string):
        entries = [
            (ElementCategory.AUDIO_TERM, ['Dual Audio']),
            (ElementCategory.VIDEO_TERM, ['H264', 'H.264', 'h264', 'h.264']),
            (ElementCategory.VIDEO_RESOLUTION, ['480p', '720p', '1080p']),
            (ElementCategory.SOURCE, ['Blu-Ray'])
        ]

        preidentified_tokens = []

        for category, keywords in entries:
            for keyword in keywords:
                keyword_begin_pos = string.find(keyword)
                if keyword_begin_pos != -1:  # Found the keyword in the string
                    Elements.insert(category, keyword)

                    keyword_end_pos = keyword_begin_pos + len(keyword)
                    preidentified_tokens.append(
                        (keyword_begin_pos, keyword_end_pos))

        return sorted(preidentified_tokens)
Exemple #13
0
    def search_for_isolated_numbers(self):
        for token in Tokens.get_list(TokenFlags.UNKNOWN):
            if not token.content.isdigit() or \
                    not parser_helper.is_token_isolated(token):
                continue

            number = int(token.content)

            # Anime year
            if number >= parser_number.ANIME_YEAR_MIN and \
                    number <= parser_number.ANIME_YEAR_MAX:
                if not Elements.contains(ElementCategory.ANIME_YEAR):
                    Elements.insert(ElementCategory.ANIME_YEAR, token.content)
                    token.category = TokenCategory.IDENTIFIER
                    continue

            # Video resolution
            if number == 480 or number == 720 or number == 1080:
                # If these numbers are isolated, it's more likely for them to
                # be the video resolution rather than the episode number. Some
                # fansub groups use these without the "p" suffix.
                if not Elements.contains(ElementCategory.VIDEO_RESOLUTION):
                    Elements.insert(
                        ElementCategory.VIDEO_RESOLUTION, token.content)
                    token.category = TokenCategory.IDENTIFIER
                    continue
Exemple #14
0
    def search_for_episode_number(self):
        # List all unknown tokens that contain a number
        tokens = [token for token in Tokens.get_list(TokenFlags.UNKNOWN)
                  if parser_helper.find_number_in_string(token.content) is not
                  None]

        if not tokens:
            return

        Elements.set_check_alt_number(
            Elements.contains(ElementCategory.EPISODE_NUMBER))

        # If a token matches a known episode pattern, it has to be the episode
        # number
        if parser_number.search_for_episode_patterns(tokens):
            return

        if Elements.contains(ElementCategory.EPISODE_NUMBER):
            return  # We have previously found an episode number via keywords

        # From now on, we're only interested in numeric tokens
        tokens = [token for token in tokens if token.content.isdigit()]

        if not tokens:
            return

        # e.g. "01 (176)", "29 (04)"
        if parser_number.search_for_equivalent_numbers(tokens):
            return

        # e.g. " - 08"
        if parser_number.search_for_separated_numbers(tokens):
            return

        # e.g. "[12]", "(2006)"
        if parser_number.search_for_isolated_numbers(tokens):
            return

        # Consider using the last number as a last resort
        parser_number.search_for_last_number(tokens)
Exemple #15
0
def match_type_and_episode_pattern(word, token):
    number_begin = parser_helper.find_number_in_string(word)
    prefix = word[:number_begin]

    keyword = keyword_manager.find(keyword_manager.normalize(prefix),
                                   ElementCategory.ANIME_TYPE)

    if keyword:
        Elements.insert(ElementCategory.ANIME_TYPE, prefix)
        number = word[number_begin:]
        if match_episode_patterns(number, token) or \
                set_episode_number(number, token, validate=True):
            # Split token (we do this last in order to avoid invalidating our
            # token reference earlier)
            token_index = Tokens.get_index(token)
            token.content = number
            Tokens.insert(
                token_index,
                Token(
                    TokenCategory.IDENTIFIER if keyword.options.identifiable
                    else TokenCategory.UNKNOWN, prefix, token.enclosed))
            return True

    return False
Exemple #16
0
 def validate_elements(self):
     # Validate anime type and episode title
     if Elements.contains(ElementCategory.ANIME_TYPE) and \
             Elements.contains(ElementCategory.EPISODE_TITLE):
         # Here we check whether the episode title contains an anime type
         episode_title = Elements.get(ElementCategory.EPISODE_TITLE)[0]
         # Copy list because we may modify it
         anime_type_list = list(Elements.get(ElementCategory.ANIME_TYPE))
         for anime_type in anime_type_list:
             if anime_type == episode_title:
                 # Invalid episode title
                 Elements.erase(ElementCategory.EPISODE_TITLE)
             elif anime_type in episode_title:
                 norm_anime_type = keyword_manager.normalize(anime_type)
                 if keyword_manager.find(
                         norm_anime_type, ElementCategory.ANIME_TYPE):
                     Elements.remove(ElementCategory.ANIME_TYPE, anime_type)
                     continue
Exemple #17
0
def set_episode_number(number, token, validate):
    if validate and not is_valid_episode_number(number):
        return False

    token.category = TokenCategory.IDENTIFIER

    category = ElementCategory.EPISODE_NUMBER

    # Handle equivalent numbers
    if Elements.get_check_alt_number():
        # TODO: check if getting only the first episode number is enough
        episode_number = Elements.get(ElementCategory.EPISODE_NUMBER)[0]
        if str2int(number) > str2int(episode_number):
            category = ElementCategory.EPISODE_NUMBER_ALT
        elif str2int(number) < str2int(episode_number):
            Elements.remove(ElementCategory.EPISODE_NUMBER, episode_number)
            Elements.insert(ElementCategory.EPISODE_NUMBER_ALT, episode_number)
        else:
            return False

    Elements.insert(category, number)
    return True
Exemple #18
0
def set_alternative_episode_number(number, token):
    Elements.insert(ElementCategory.EPISODE_NUMBER_ALT, number)
    token.category = TokenCategory.IDENTIFIER

    return True
Exemple #19
0
 def set_anime_season(first, second, content):
     Elements.insert(ElementCategory.ANIME_SEASON, content)
     first.category = TokenCategory.IDENTIFIER
     second.category = TokenCategory.IDENTIFIER