def search_for_release_group(self): token_end = None while True: # Find the first enclosed unknown token if token_end: token_begin = Tokens.find_next( token_end, TokenFlags.ENCLOSED | TokenFlags.UNKNOWN) else: token_begin = Tokens.find( TokenFlags.ENCLOSED | TokenFlags.UNKNOWN) if token_begin is None: return # Continue until a bracket or identifier is found token_end = Tokens.find_next( token_begin, TokenFlags.BRACKET | TokenFlags.IDENTIFIER) if token_end is None: return if token_end.category != TokenCategory.BRACKET: continue # Ignore if it's not the first non-delimiter token in group previous_token = Tokens.find_previous( token_begin, TokenFlags.NOT_DELIMITER) if previous_token is not None and \ previous_token.category != TokenCategory.BRACKET: continue # Build release group, token end is a bracket, so we get the # previous token to be included in the element token_end = Tokens.find_previous(token_end, TokenFlags.VALID) parser_helper.build_element( ElementCategory.RELEASE_GROUP, token_begin, token_end, keep_delimiters=True) return
def search_for_episode_title(self): token_end = None while True: # Find the first non-enclosed unknown token if token_end: token_begin = Tokens.find_next( token_end, TokenFlags.NOT_ENCLOSED | TokenFlags.UNKNOWN) else: token_begin = Tokens.find( TokenFlags.NOT_ENCLOSED | TokenFlags.UNKNOWN) if token_begin is None: return # Continue until a bracket or identifier is found token_end = Tokens.find_next( token_begin, TokenFlags.BRACKET | TokenFlags.IDENTIFIER) # Ignore if it's only a dash if Tokens.distance(token_begin, token_end) <= 2 and \ parser_helper.is_dash_character(token_begin.content): continue # If token end is a bracket, then we get the previous token to be # included in the element if token_end and token_end.category == TokenCategory.BRACKET: token_end = Tokens.find_previous(token_end, TokenFlags.VALID) # Build episode title parser_helper.build_element( ElementCategory.EPISODE_TITLE, token_begin, token_end, keep_delimiters=False) return
def search_for_anime_title(self): enclosed_title = False # Find the first non-enclosed unknown token token_begin = Tokens.find(TokenFlags.NOT_ENCLOSED | TokenFlags.UNKNOWN) # If that doesn't work, find the first unknown token in the second # enclosed group, assuming that the first one is the release group if token_begin is None: enclosed_title = True token_begin = Tokens.get(0) skipped_previous_group = False while token_begin is not None: token_begin = Tokens.find_next(token_begin, TokenFlags.UNKNOWN) if token_begin is None: break # Ignore groups that are composed of non-Latin characters if parser_helper.is_mostly_latin_string(token_begin.content): if skipped_previous_group: break # Found it # Get the first unknown token of the next group token_begin = Tokens.find_next(token_begin, TokenFlags.BRACKET) skipped_previous_group = True if token_begin is None: return # Continue until an identifier (or a bracket, if the title is enclosed) # is found token_end = Tokens.find_next( token_begin, TokenFlags.IDENTIFIER | ( TokenFlags.BRACKET if enclosed_title else TokenFlags.NONE )) # If within the interval there's an open bracket without its matching # pair, move the upper endpoint back to the bracket if not enclosed_title: last_bracket = token_end bracket_open = False for token in Tokens.get_list(TokenFlags.BRACKET, begin=token_begin, end=token_end): last_bracket = token bracket_open = not bracket_open if bracket_open: token_end = last_bracket # If the interval ends with an enclosed group (e.g. "Anime Title # [Fansub]"), move the upper endpoint back to the beginning of the # group. We ignore parentheses in order to keep certain groups (e.g. # "(TV)") intact. if not enclosed_title: token = Tokens.find_previous(token_end, TokenFlags.NOT_DELIMITER) while token.category == TokenCategory.BRACKET and \ token.content != ')': token = Tokens.find_previous(token, TokenFlags.BRACKET) if token is not None: token_end = token token = Tokens.find_previous( token_end, TokenFlags.NOT_DELIMITER) # Token end is a bracket, so we get the previous token to be included # in the element token_end = Tokens.find_previous(token_end, TokenFlags.VALID) parser_helper.build_element(ElementCategory.ANIME_TITLE, token_begin, token_end, keep_delimiters=False)