def parse_artists(soup, track, title): """ Parse the per-track artists from the tracks or the header.""" raw_rls_arts = [ s.string for s in soup.select("#topbar_bread h1 a") if "/artists/" in s["href"] and s.string ] or [s.string.title() for s in soup.select("#product_heading_artist a")] artists = [] for art in raw_rls_arts: for split in re_split(art): artists.append(split) try: artists = split_artists( track.select('meta[itemprop="byArtist"]')[0]["content"], artists ) except (TypeError, IndexError): artists = [(a, "main") for a in artists] guests = re.search(r"[Ff]eat\.? ([^\)]+)", title) if guests: artists += [ (re.sub(r"( -)? .+? (mix|edit)", "", a, flags=re.IGNORECASE), "guest") for a in re_split(guests[1]) ] return artists
def parse_artists(self, artists, default_artists, title): """ Iterate over all artists and roles, returning a compliant list of artist tuples. """ result = [] feat = RE_FEAT.search(title) if feat: for artist in re_split(feat[1]): result.append((unescape(artist), "guest")) if artists: for a in artists.get("mainartist") or artists.get("main_artist", []): for b in re_split(a): if (b, "main") not in result: result.append((b, "main")) for a in artists.get("featuredartist", []): for b in re_split(a): if (b, "guest") not in result: result.append((b, "guest")) else: for artist in default_artists: for b in re_split(artist["ART_NAME"]): if (b, "main") not in result: result.append((b, "main")) return result
def split_genres(genres_list): """Create a list of genres from splitting the string.""" genres = set() if genres_list: for g in genres_list: for genre in re_split(g): genres.add(genre.strip()) return list(genres)
def split_artists(artist, rls_artists): """ Split an artist string by known delimiter characters and compare them to the album artists. If any release artists match the split artists, return the release artists that matched. """ art_li = [a.strip() for a in re_split(artist) if a] rls_artists = [a.lower() for a in rls_artists] return [(r, "main") for r in art_li]
def parse_artists(artist, title): """ Parse guest artists from the title and add them to the list of artists as guests. """ feat_artists = RE_FEAT.search(title) artists = [] if feat_artists: artists = [(a, "guest") for a in re_split(feat_artists[1])] try: if " - " not in title: raise IndexError track_artists = title.split(" - ", 1)[0] artists += [(a, "main") for a in re_split(track_artists)] except (IndexError, TypeError): if "various" not in artist.lower(): artists += [(a, "main") for a in re_split(artist)] return artists
def parse_artists(artist_list): """Split the artists by common split characters, and aso accomodate features.""" artists = [] if not artist_list: artist_list = "none" if isinstance(artist_list, str): artist_list = [artist_list] for artist in artist_list: feat = RE_FEAT.search(artist) if feat: for a in re_split(feat[1]): artists.append((a, "guest")) artist = artist.replace(feat[0], "") remix = re.search(r" \(?remix(?:\.|ed|ed by)? ([^\)]+)\)?", artist) if remix: for a in re_split(remix[1]): artists.append((a, "remixer")) artist = artist.replace(remix[0], "") for a in re_split(artist): artists.append((a, "main")) return artists
def filter_results(results, artists, album): filtered = {} for rls_id, result in (results or {}).items(): if artists: split_artists = [] for a in artists: split_artists += re_split(re_strip(normalize_accents(a))) stripped_rls_artist = re_strip(normalize_accents(result[0].artist)) if "Various" in result[0].artist: if len(artists) == 1: continue elif not any(a in stripped_rls_artist for a in split_artists): continue elif not any(a in stripped_rls_artist.split() for a in chain.from_iterable( [a.split() for a in split_artists])): continue if album: if not _compare_albums(album, result[0].album): continue filtered[rls_id] = result return filtered
def parse_artists(self, artists, title, track_id): # noqa: C901 """ Iterate over all artists and roles, returning a compliant list of artist tuples. """ result = [] artist_set = set() feat = RE_FEAT.search(title) if feat: for artist in re_split(feat[1]): result.append((unescape(artist), "guest")) artist_set.add(unescape(artist).lower()) remix_str = "" remixer_str = re.search(r" \((.*) [Rr]emix\)", title) if remixer_str: remix_str = unescape(remixer_str[1]).lower() all_guests = all(a["type"] == "FEATURED" for a in artists) for artist in artists: for a in re_split(artist["name"]): feat = RE_FEAT.search(a) if feat: for artist_ in re_split(feat[1]): result.append((unescape(artist_), "guest")) artist_set.add(unescape(artist_).lower()) a = re.sub(feat[0] + "$", "", a).rstrip() if artist["type"] in ROLES and unescape( a).lower() not in artist_set: if unescape(a).lower() in remix_str: result.append((unescape(a), "remixer")) elif all_guests: result.append((unescape(a), "main")) else: result.append((unescape(a), ROLES[artist["type"]])) artist_set.add(unescape(a).lower()) if "mix" in title.lower(): # Get contributors for (re)mixes. attempts = 0 while True: try: artists = self.get_json_sync( f"/tracks/{track_id}/contributors", params={ "countryCode": self.country_code, "limit": 25 }, )["items"] break except ScrapeError: attempts += 1 if attempts > 3: break for artist in artists: if (artist["role"] == "Remixer" and artist["name"].lower() not in artist_set): result.append((unescape(artist["name"]), "remixer")) artist_set.add(artist["name"].lower()) # In case something is f****d, have a failsafe of returning all artists. return result if result else [(unescape(a["name"]), "main") for a in artists]