예제 #1
0
파일: parser.py 프로젝트: kayabe/anitopy
    def search_for_release_group(self):
        token_end = None
        while True:
            # Find the first enclosed unknown token
            if token_end:
                token_begin = Tokens.find_next(
                    token_end, TokenFlags.ENCLOSED | TokenFlags.UNKNOWN)
            else:
                token_begin = Tokens.find(
                    TokenFlags.ENCLOSED | TokenFlags.UNKNOWN)
            if token_begin is None:
                return

            # Continue until a bracket or identifier is found
            token_end = Tokens.find_next(
                token_begin, TokenFlags.BRACKET | TokenFlags.IDENTIFIER)
            if token_end is None:
                return
            if token_end.category != TokenCategory.BRACKET:
                continue

            # Ignore if it's not the first non-delimiter token in group
            previous_token = Tokens.find_previous(
                token_begin, TokenFlags.NOT_DELIMITER)
            if previous_token is not None and \
                    previous_token.category != TokenCategory.BRACKET:
                continue

            # Build release group, token end is a bracket, so we get the
            # previous token to be included in the element
            token_end = Tokens.find_previous(token_end, TokenFlags.VALID)
            parser_helper.build_element(
                ElementCategory.RELEASE_GROUP, token_begin, token_end,
                keep_delimiters=True)
            return
예제 #2
0
파일: parser.py 프로젝트: kayabe/anitopy
    def search_for_episode_title(self):
        token_end = None
        while True:
            # Find the first non-enclosed unknown token
            if token_end:
                token_begin = Tokens.find_next(
                    token_end, TokenFlags.NOT_ENCLOSED | TokenFlags.UNKNOWN)
            else:
                token_begin = Tokens.find(
                    TokenFlags.NOT_ENCLOSED | TokenFlags.UNKNOWN)
            if token_begin is None:
                return

            # Continue until a bracket or identifier is found
            token_end = Tokens.find_next(
                token_begin, TokenFlags.BRACKET | TokenFlags.IDENTIFIER)

            # Ignore if it's only a dash
            if Tokens.distance(token_begin, token_end) <= 2 and \
                    parser_helper.is_dash_character(token_begin.content):
                continue

            # If token end is a bracket, then we get the previous token to be
            # included in the element
            if token_end and token_end.category == TokenCategory.BRACKET:
                token_end = Tokens.find_previous(token_end, TokenFlags.VALID)
            # Build episode title
            parser_helper.build_element(
                ElementCategory.EPISODE_TITLE, token_begin, token_end,
                keep_delimiters=False)
            return
예제 #3
0
def search_for_equivalent_numbers(tokens):
    for token in tokens:
        if parser_helper.is_token_isolated(token) or \
                not is_valid_episode_number(token.content):
            continue

        # Find the first enclosed, non-delimiter token
        next_token = Tokens.find_next(token, TokenFlags.NOT_DELIMITER)
        if next_token is None or next_token.category != TokenCategory.BRACKET:
            continue
        next_token = Tokens.find_next(
            next_token, TokenFlags.ENCLOSED | TokenFlags.NOT_DELIMITER)
        if next_token.category != TokenCategory.UNKNOWN:
            continue

        # Check if it's an isolated number
        if not parser_helper.is_token_isolated(next_token) or \
                not next_token.content.isdigit() or \
                not is_valid_episode_number(next_token.content):
            continue

        episode = min(token, next_token, key=lambda t: int(t.content))
        alt_episode = max(token, next_token, key=lambda t: int(t.content))

        set_episode_number(episode.content, episode, validate=False)
        set_alternative_episode_number(alt_episode.content, alt_episode)

        return True

    return False
예제 #4
0
def number_comes_before_another_number(token):
    separator_token = Tokens.find_next(token, TokenFlags.NOT_DELIMITER)

    if separator_token:
        separator = separator_token.content
        if separator == '&' or separator == 'of':
            other_token = Tokens.find_next(separator_token,
                                           TokenFlags.NOT_DELIMITER)
            if other_token and other_token.content.isdigit():
                set_episode_number(token.content, token, validate=False)
                if separator == '&':
                    set_episode_number(other_token.content,
                                       token,
                                       validate=False)
                separator_token.category = TokenCategory.IDENTIFIER
                other_token.category = TokenCategory.IDENTIFIER
                return True

    return False
예제 #5
0
def is_token_isolated(token):
    previous_token = Tokens.find_previous(token, TokenFlags.NOT_DELIMITER)
    if previous_token.category != TokenCategory.BRACKET:
        return False

    next_token = Tokens.find_next(token, TokenFlags.NOT_DELIMITER)
    if next_token.category != TokenCategory.BRACKET:
        return False

    return True
예제 #6
0
def check_anime_season_keyword(token):
    def set_anime_season(first, second, content):
        Elements.insert(ElementCategory.ANIME_SEASON, content)
        first.category = TokenCategory.IDENTIFIER
        second.category = TokenCategory.IDENTIFIER

    previous_token = Tokens.find_previous(token, TokenFlags.NOT_DELIMITER)
    if previous_token:
        number = get_number_from_ordinal(previous_token.content)
        if number:
            set_anime_season(previous_token, token, number)
            return True

    next_token = Tokens.find_next(token, TokenFlags.NOT_DELIMITER)
    if next_token and next_token.content.isdigit():
        set_anime_season(token, next_token, next_token.content)
        return True

    return False
예제 #7
0
def check_extent_keyword(category, token):
    next_token = Tokens.find_next(token, TokenFlags.NOT_DELIMITER)

    if next_token.category == TokenCategory.UNKNOWN:
        if next_token and \
                parser_helper.find_number_in_string(next_token.content) \
                is not None:
            if category == ElementCategory.EPISODE_NUMBER:
                if not match_episode_patterns(next_token.content, next_token):
                    set_episode_number(next_token.content,
                                       next_token,
                                       validate=False)
            elif category == ElementCategory.VOLUME_NUMBER:
                if not match_volume_patterns(next_token.content, next_token):
                    set_volume_number(next_token.content,
                                      next_token,
                                      validate=False)
            else:
                return False
            token.category = TokenCategory.IDENTIFIER
            return True

    return False
예제 #8
0
파일: parser.py 프로젝트: kayabe/anitopy
    def search_for_anime_title(self):
        enclosed_title = False

        # Find the first non-enclosed unknown token
        token_begin = Tokens.find(TokenFlags.NOT_ENCLOSED | TokenFlags.UNKNOWN)

        # If that doesn't work, find the first unknown token in the second
        # enclosed group, assuming that the first one is the release group
        if token_begin is None:
            enclosed_title = True
            token_begin = Tokens.get(0)
            skipped_previous_group = False
            while token_begin is not None:
                token_begin = Tokens.find_next(token_begin, TokenFlags.UNKNOWN)
                if token_begin is None:
                    break
                # Ignore groups that are composed of non-Latin characters
                if parser_helper.is_mostly_latin_string(token_begin.content):
                    if skipped_previous_group:
                        break  # Found it
                # Get the first unknown token of the next group
                token_begin = Tokens.find_next(token_begin, TokenFlags.BRACKET)
                skipped_previous_group = True

        if token_begin is None:
            return

        # Continue until an identifier (or a bracket, if the title is enclosed)
        # is found
        token_end = Tokens.find_next(
            token_begin, TokenFlags.IDENTIFIER | (
                TokenFlags.BRACKET if enclosed_title else TokenFlags.NONE
            ))

        # If within the interval there's an open bracket without its matching
        # pair, move the upper endpoint back to the bracket
        if not enclosed_title:
            last_bracket = token_end
            bracket_open = False
            for token in Tokens.get_list(TokenFlags.BRACKET, begin=token_begin,
                                         end=token_end):
                last_bracket = token
                bracket_open = not bracket_open
            if bracket_open:
                token_end = last_bracket

        # If the interval ends with an enclosed group (e.g. "Anime Title
        # [Fansub]"), move the upper endpoint back to the beginning of the
        # group. We ignore parentheses in order to keep certain groups (e.g.
        # "(TV)") intact.
        if not enclosed_title:
            token = Tokens.find_previous(token_end, TokenFlags.NOT_DELIMITER)
            while token.category == TokenCategory.BRACKET and \
                    token.content != ')':
                token = Tokens.find_previous(token, TokenFlags.BRACKET)
                if token is not None:
                    token_end = token
                    token = Tokens.find_previous(
                        token_end, TokenFlags.NOT_DELIMITER)

        # Token end is a bracket, so we get the previous token to be included
        # in the element
        token_end = Tokens.find_previous(token_end, TokenFlags.VALID)
        parser_helper.build_element(ElementCategory.ANIME_TITLE, token_begin,
                                    token_end, keep_delimiters=False)
예제 #9
0
 def find_next_valid_token(token):
     return Tokens.find_next(token, TokenFlags.VALID)