def search_pairs(item): """Yield a pairs of artists and titles to search for. The first item in the pair is the name of the artist, the second item is a list of song names. In addition to the artist and title obtained from the `item` the method tries to strip extra information like paranthesized suffixes and featured artists from the strings and add them as candidates. The method also tries to split multiple titles separated with `/`. """ def generate_alternatives(string, patterns): """Generate string alternatives by extracting first matching group for each given pattern. """ alternatives = [string] for pattern in patterns: match = re.search(pattern, string, re.IGNORECASE) if match: alternatives.append(match.group(1)) return alternatives title, artist = item.title, item.artist patterns = [ # Remove any featuring artists from the artists name r"(.*?) {0}".format(plugins.feat_tokens())] artists = generate_alternatives(artist, patterns) patterns = [ # Remove a parenthesized suffix from a title string. Common # examples include (live), (remix), and (acoustic). r"(.+?)\s+[(].*[)]$", # Remove any featuring artists from the title r"(.*?) {0}".format(plugins.feat_tokens(for_artist=False)), # Remove part of title after colon ':' for songs with subtitles r"(.+?)\s*:.*"] titles = generate_alternatives(title, patterns) # Check for a dual song (e.g. Pink Floyd - Speak to Me / Breathe) # and each of them. multi_titles = [] for title in titles: multi_titles.append([title]) if '/' in title: multi_titles.append([x.strip() for x in title.split('/')]) return itertools.product(artists, multi_titles)
def search_pairs(item): """Yield a pairs of artists and titles to search for. The first item in the pair is the name of the artist, the second item is a list of song names. In addition to the artist and title obtained from the `item` the method tries to strip extra information like paranthesized suffixes and featured artists from the strings and add them as candidates. The method also tries to split multiple titles separated with `/`. """ title, artist = item.title, item.artist titles = [title] artists = [artist] # Remove any featuring artists from the artists name pattern = r"(.*?) {0}".format(plugins.feat_tokens()) match = re.search(pattern, artist, re.IGNORECASE) if match: artists.append(match.group(1)) # Remove a parenthesized suffix from a title string. Common # examples include (live), (remix), and (acoustic). pattern = r"(.+?)\s+[(].*[)]$" match = re.search(pattern, title, re.IGNORECASE) if match: titles.append(match.group(1)) # Remove any featuring artists from the title pattern = r"(.*?) {0}".format(plugins.feat_tokens(for_artist=False)) for title in titles[:]: match = re.search(pattern, title, re.IGNORECASE) if match: titles.append(match.group(1)) # Check for a dual song (e.g. Pink Floyd - Speak to Me / Breathe) # and each of them. multi_titles = [] for title in titles: multi_titles.append([title]) if '/' in title: multi_titles.append([x.strip() for x in title.split('/')]) return itertools.product(artists, multi_titles)
def split_on_feat(artist): """Given an artist string, split the "main" artist from any artist on the right-hand side of a string like "feat". Return the main artist, which is always a string, and the featuring artist, which may be a string or None if none is present. """ # split on the first "feat". regex = re.compile(plugins.feat_tokens(), re.IGNORECASE) parts = [s.strip() for s in regex.split(artist, 1)] if len(parts) == 1: return parts[0], None else: return tuple(parts)
def __init__(self, name=None): super().__init__(name=name) self.config.add({ "artist_fields": "artist artist_credit artists", "check_fields": "artist artist_credit artists albumartist albumartist_credit", "check_query": "^comp:1", "check_single_track": True, "sections": "", }) self.feat_tokens = re.compile( plugins.feat_tokens(for_artist=True).replace("|and", "").replace( "|\&", "")) self.artist_tokens = re.compile(r"(?<=\s)(?:and|\&|,)(?=\s)") self.asciify = library.DefaultTemplateFunctions.tmpl_asciify
def contains_feat(title): """Determine whether the title contains a "featured" marker. """ return bool(re.search(plugins.feat_tokens(), title, flags=re.IGNORECASE))