def getProfile(self): apc = artistDBProfileClass() return apc data = {} artistdiv = self.bsdata.find("div", {"id": "tlmdata"}) if artistdiv is not None: artistdata = artistdiv.attrs['data-tealium-data'] else: artistdata = None if artistdata is not None: try: artistvals = json.loads(artistdata) genres = artistvals["tag"] except: genres = None if genres is not None: genres = genres.split(",") else: genres = None else: genres = None data["Profile"] = {'genre': genres, 'style': None} apc = artistDBProfileClass(profile=data.get("Profile"), aliases=data.get("Aliases"), members=data.get("Members"), groups=data.get("In Groups"), sites=data.get("Sites"), variations=data.get("Variations")) return apc
def getProfile(self): result = self.bsdata.find("div", {"class": "profile"}) heads = result.findAll("div", {"class": "head"}) content = result.findAll("div", {"class": "content"}) profileData = dict(zip(heads, content)) if len(heads) == len(content) else {} generalData = {} for head,content in profileData.items(): key = head.text[:-1] if isinstance(head.text, str) else None refs = content.findAll("a") val = [artistDBTextClass(content)] if len(refs) == 0 else [artistDBLinkClass(ref) for ref in refs] generalData[key] = val extraData={} ulData = self.bsdata.findAll("ul", {"class": "facets_nav"}) for ul in ulData: refs = ul.findAll("a") if ul is not None else None attrs = [[ref.attrs.get('data-credit-type'), ref.attrs.get('data-credit-subtype'),ref] for ref in refs] if refs is not None else None for dctype,dcsubtype,ref in attrs: if not all([dctype,dcsubtype]): continue if extraData.get(dctype) is None: extraData[dctype] = {} extraData[dctype][dcsubtype] = artistDBLinkClass(ref) extraData = extraData if len(extraData) > 0 else None apc = artistDBProfileClass(general=generalData, extra=extraData) return apc
def getProfile(self): data = {} apc = artistDBProfileClass(profile=data.get("Profile"), aliases=data.get("Aliases"), members=data.get("Members"), groups=data.get("In Groups"), sites=data.get("Sites"), variations=data.get("Variations")) return apc
def getProfile(self): generalData = {} genreData = None extraData = None tagsData = None artistInfo = self.bsdata.find("div", {"class": "artistTopBox info"}) detailRows = artistInfo.findAll( "div", {"class": "detailRow"}) if artistInfo is not None else [] for row in detailRows: span = row.find("span") if span is None: continue key = span.text.strip() if span.text is not None else None key = key[1:].strip() if (isinstance(key, str) and key.startswith("/")) else key refs = row.findAll("a") if len(refs) == 0: continue vals = [artistDBLinkClass(ref) for ref in refs ] if (isinstance(refs, list) and len(refs) > 0) else None if key == "Genres": genreData = vals else: generalData[key] = vals relatedArtists = self.bsdata.find("div", {"class": "relatedArtists"}) artistBlocks = relatedArtists.findAll( "div", {"class": "artistBlock"}) if relatedArtists is not None else None refs = [artistBlock.find("a") for artistBlock in artistBlocks ] if artistBlocks is not None else None if refs is not None: extraData = [ artistDBLinkClass(ref) for ref in refs if ref is not None ] generalData = generalData if len(generalData) > 0 else None apc = artistDBProfileClass(general=generalData, genres=genreData, tags=tagsData, extra=extraData) return apc
def getData(self, inputdata): self.getDataBase(inputdata) self.checkData() if self.dbdata is not None: return self.dbdata if not isinstance(self.bsdata, dict): raise ValueError("Could not parse LastFM API data") tracks = self.bsdata["Tracks"] albums = self.bsdata["Albums"] if len(tracks) > 0: artistData = { "Name": tracks[0]["artistName"], "URL": tracks[0]["artistURL"], "MBID": tracks[0]["artistMBID"] } elif len(albums) > 0: artistData = { "Name": albums[0]["artistName"], "URL": albums[0]["artistURL"], "MBID": albums[0]["artistMBID"] } else: return None raise ValueError("No track/album data!") artistName = artistData["Name"] artistURL = artistData["URL"] artistID = self.dbUtils.getArtistID(artistURL) generalData = None externalData = {"MusicBrainzID": artistData["MBID"]} #mbID = mbutil.getArtistID(artistData['MBID'] trackData = [{ "Name": track["name"], "URL": track["URL"], "Counts": int(track["counts"]) } for track in tracks if int(track["counts"]) > 50] counts = sorted([x["Counts"] for x in trackData], reverse=True) idx = min([len(counts) - 1, 1000 - 1]) trackData = [v for v in trackData if v['Counts'] >= counts[idx]] albumData = [{ "Name": album["name"], "URL": album["URL"], "Counts": int(album["counts"]) } for album in albums if int(album["counts"]) > 25] counts = sorted([x["Counts"] for x in albumData], reverse=True) idx = min([len(counts) - 1, 1000 - 1]) albumData = [v for v in albumData if v['Counts'] >= counts[idx]] mediaData = {} if len(trackData) > 0: mediaName = "Tracks" mediaData[mediaName] = [] for artistTrack in trackData: m = md5() m.update(artistTrack['Name'].encode('utf-8')) m.update(artistTrack['URL'].encode('utf-8')) hashval = m.hexdigest() code = str(int(hashval, 16) % int(1e7)) album = artistTrack["Name"] albumURL = artistTrack["URL"] albumArtists = [artistData["Name"]] amdc = artistDBMediaDataClass(album=album, url=albumURL, artist=albumArtists, code=code, year=None) mediaData[mediaName].append(amdc) if len(albumData) > 0: mediaName = "Albums" mediaData[mediaName] = [] for artistAlbum in albumData: m = md5() m.update(artistAlbum['Name'].encode('utf-8')) m.update(artistAlbum['URL'].encode('utf-8')) hashval = m.hexdigest() code = str(int(hashval, 16) % int(1e7)) album = artistAlbum["Name"] albumURL = artistAlbum["URL"] albumArtists = [artistName] amdc = artistDBMediaDataClass(album=album, url=albumURL, artist=albumArtists, code=code, year=None) mediaData[mediaName].append(amdc) artist = artistDBNameClass(name=artistName, err=None) meta = artistDBMetaClass(title=None, url=artistURL) url = artistDBURLClass(url=artistURL) ID = artistDBIDClass(ID=artistID) pages = artistDBPageClass(ppp=1, tot=1, redo=False, more=False) profile = artistDBProfileClass(general=generalData, external=externalData) media = artistDBMediaClass() media.media = mediaData mediaCounts = self.getMediaCounts(media) info = self.getInfo() adc = artistDBDataClass(artist=artist, meta=meta, url=url, ID=ID, pages=pages, profile=profile, mediaCounts=mediaCounts, media=media, info=info) return adc
def getProfile(self): apc = artistDBProfileClass() return apc
def getProfile(self): profile = self.bsdata.find("div", {"class": "artist_info"}) if profile is None: apc = artistDBProfileClass(err="NoInfo") return apc headers = profile.findAll("div", {"class": "info_hdr"}) headers = [header.text for header in headers] content = profile.findAll("div", {"class": "info_content"}) profileData = dict(zip(headers, content)) generalData = {} extraData = None genreData = None externalData = None rymList = self.bsdata.find("ul", {"class": "lists"}) listInfos = rymList.findAll( "div", {"class": "list_info"}) if rymList is not None else [] userLists = [] for userList in listInfos: listName = userList.find("div", {"class": "list_name"}) listUser = userList.find("div", {"class": "list_author"}) listRef = listName.find("a") if listRef is not None: userLists.append(artistDBLinkClass(listRef)) continue listRef = listName.find("a") if listName is not None else None listText = listRef.text if listRef is not None else None listRef = listRef.attrs['href'] if listRef is not None else None userLists[listRef] = listText externalData = {"Lists": userLists} if len(userLists) > 0 else None if profileData.get("Formed") is not None: tag = profileData["Formed"] if tag is not None: refs = tag.findAll("a") generalData["Formed"] = [artistDBTextClass(tag)] if len( refs) == 0 else [artistDBLinkClass(ref) for ref in refs] if profileData.get("Disbanded") is not None: tag = profileData["Disbanded"] if tag is not None: refs = tag.findAll("a") generalData["Disbanded"] = [artistDBTextClass(tag)] if len( refs) == 0 else [artistDBLinkClass(ref) for ref in refs] if profileData.get("Members") is not None: tag = profileData["Members"] if tag is not None: refs = tag.findAll("a") generalData["Members"] = [artistDBTextClass(tag)] if len( refs) == 0 else [artistDBLinkClass(ref) for ref in refs] if profileData.get("Also Known As"): tag = profileData["Also Known As"] if tag is not None: refs = tag.findAll("a") generalData["Also Known As"] = [artistDBTextClass(tag)] if len( refs) == 0 else [artistDBLinkClass(ref) for ref in refs] if profileData.get("Member of"): tag = profileData["Member of"] if tag is not None: refs = tag.findAll("a") extraData = {} extraData["Member of"] = [artistDBTextClass(tag)] if len( refs) == 0 else [artistDBLinkClass(ref) for ref in refs] if profileData.get("Related Artists"): tag = profileData["Related Artists"] if tag is not None: refs = tag.findAll("a") extraData = {} extraData["Related Artists"] = [artistDBTextClass(tag)] if len( refs) == 0 else [artistDBLinkClass(ref) for ref in refs] if profileData.get("Born"): tag = profileData["Born"] if tag is not None: refs = tag.findAll("a") generalData["Born"] = [artistDBTextClass(tag)] if len( refs) == 0 else [artistDBLinkClass(ref) for ref in refs] if profileData.get("Currently"): tag = profileData["Currently"] if tag is not None: refs = tag.findAll("a") generalData["Currently"] = [artistDBTextClass(tag)] if len( refs) == 0 else [artistDBLinkClass(ref) for ref in refs] if profileData.get("Genres") is not None: tag = profileData["Genres"] if tag is not None: refs = tag.findAll("a") genreData = [artistDBTextClass(tag)] if len(refs) == 0 else [ artistDBLinkClass(ref) for ref in refs ] if profileData.get("Notes"): tag = profileData["Notes"] if tag is not None: refs = tag.findAll("a") generalData["Notes"] = [artistDBTextClass(tag)] if len( refs) == 0 else [artistDBLinkClass(ref) for ref in refs] generalData = generalData if len(generalData) > 0 else None apc = artistDBProfileClass(general=generalData, genres=genreData, extra=extraData, external=externalData) return apc
def getProfile(self): ## ## Artist information ## artistInformation = {} properties = self.bsdata.find("dl", {"class": "properties"}) if properties is not None: dds = properties.findAll("dd") for val in dds: attrs = val.attrs.get('class') if isinstance(attrs, list) and len(attrs) == 1: attrKey = attrs[0] refs = val.findAll('a') attrVal = [artistDBTextClass(val)] if len(refs) == 0 else [ artistDBLinkClass(ref) for ref in refs ] artistInformation[attrKey] = attrVal ## ## Genres ## genreList = self.bsdata.find("div", {"class": "genre-list"}) genreData = [artistDBLinkClass(ref) for ref in genreList.findAll("a") ] if genreList is not None else None ## ## Tags ## tagList = self.bsdata.find("div", {"id": "sidebar-tag-list"}) tagData = [artistDBLinkClass(ref) for ref in tagList.findAll("a") ] if tagList is not None else None ## ## External Links ## externalLinks = {} externalLinksList = self.bsdata.find("ul", {"class": "external_links"}) if externalLinksList is not None: lis = externalLinksList.findAll("li") for li in lis: attrs = li.attrs.get('class') if isinstance(attrs, list) and len(attrs) == 1: attrKey = attrs[0] refs = li.findAll('a') attrVal = [artistDBTextClass(li)] if len(refs) == 0 else [ artistDBLinkClass(ref) for ref in refs ] externalLinks[attrKey] = attrVal ## ## Extra ## tabs = self.bsdata.find("div", {"class": "tabs"}) refs = tabs.findAll("a") if tabs is not None else None tabLinks = [artistDBLinkClass(ref) for ref in refs] if refs is not None else None keys = [x.text for x in tabLinks] if tabLinks is not None else None vals = tabLinks tabsData = dict(zip(keys, vals)) if (isinstance(keys, list) and all(keys)) else None extraData = tabsData apc = artistDBProfileClass(general=artistInformation, tags=tagData, genres=genreData, extra=extraData, external=externalLinks) return apc
def getData(self, inputdata): self.getDataBase(inputdata) self.checkData() if self.dbdata is not None: return self.dbdata if not isinstance(self.bsdata, dict): raise ValueError("Could not parse Spotify API data") artistData = self.bsdata['Artist'] artistID = artistData.name artistURI = artistData.get('uri') artistType = artistData.get('stype') artistPopularity = artistData.get('popularity') artistName = artistData.get('name') artistAPIURL = artistData.get('href') artistGenres = artistData.get('genres', []) artistFollowers = artistData.get('followers') artistURL = artistData.get('urls', {}).get('spotify') generalData = {"Type": artistType} genresData = artistGenres if len(artistGenres) > 0 else None externalData = {'SpotifyAPI': {"URL": artistAPIURL, "URI": artistURI}} extraData = { 'Followers': artistFollowers, "Popularity": artistPopularity } mediaData = {} albumsData = self.bsdata['Albums'] if len(albumsData) > 0: albumsURL = albumsData.get('href') if albumsData.get('artistID') != artistID: raise ValueError( "ArtistIDs do not match for Spotify API Data! [{0}, {1}]". format(albumsData.get('artistID'), artistID)) mediaData = {} for albumData in albumsData.get('albums', []): albumID = albumData.get('sid') albumGroup = albumData.get('album_group') albumType = albumData.get('album_type') albumSType = albumData.get('stype') albumArtists = [{ artist['sid']: artist['name'] } for artist in albumData.get('artists', [])] albumURL = albumData.get('urls', {}).get('spotify') albumURI = albumData.get('uri') albumAPI = albumData.get('href') albumName = albumData.get('name') albumTracks = albumData.get('numtracks') albumDate = albumData.get('date') try: albumYear = to_datetime( albumDate).year if albumDate is not None else None except: albumYear = None if all([albumGroup, albumType]): mediaName = " + ".join([albumGroup, albumType]) elif albumGroup is not None: mediaName = albumGroup elif albumType is not None: mediaName = albumType else: mediaName = "Unknown" amdc = artistDBMediaDataClass(album=albumName, url=albumURL, artist=albumArtists, code=albumID, year=albumYear, aclass=albumSType, aformat={ "URI": albumURI, "API": albumAPI, "Date": albumDate, "NumTracks": albumTracks }) if mediaData.get(mediaName) is None: mediaData[mediaName] = [] mediaData[mediaName].append(amdc) artist = artistDBNameClass(name=artistName, err=None) meta = artistDBMetaClass(title=None, url=artistURL) url = artistDBURLClass(url=artistURL) ID = artistDBIDClass(ID=artistID) pages = artistDBPageClass(ppp=1, tot=1, redo=False, more=False) profile = artistDBProfileClass(general=generalData, external=externalData, extra=extraData, genres=genresData) media = artistDBMediaClass() media.media = mediaData mediaCounts = self.getMediaCounts(media) info = self.getInfo() adc = artistDBDataClass(artist=artist, meta=meta, url=url, ID=ID, pages=pages, profile=profile, mediaCounts=mediaCounts, media=media, info=info) return adc
def getProfile(self): generalData = {} ## ## General ## metadata = self.bsdata.find("div", {"class": "metadata-and-wiki-row"}) if metadata is not None: dls = metadata.findAll("dl") for dl in dls: dts = [dt.text for dt in dl.findAll("dt")] dds = dl.findAll("dd") attrKeys = dts attrVals = [] for dd in dds: refs = dd.findAll("a") attrVals.append( [artistDBTextClass(dd)] if len(refs) == 0 else [artistDBLinkClass(ref) for ref in refs]) dlData = dict(zip(attrKeys, attrVals)) generalData["Metadata"] = dlData wikicolumns = self.bsdata.findAll("div", {"class": "wiki-column"}) for wikicolumn in wikicolumns: wikiblocks = wikicolumn.findAll("div", {"class": "wiki-block"}) for wikiblock in wikiblocks: refs = wikiblock.findAll("a") links = [artistDBLinkClass(ref) for ref in refs] if (isinstance(refs, list) and len(refs) > 0) else None for ref in refs: removeTag(wikiblock, ref) text = artistDBTextClass(wikiblock) if generalData.get("Wiki") is None: generalData["Wiki"] = {"Text": [], "Refs": []} generalData["Wiki"]["Text"].append(text) for ref in refs: generalData["Wiki"]["Refs"] += links if generalData.get("Wiki") is not None: keep = {(ref.href, ref.text): ref for ref in generalData["Wiki"]["Refs"]} generalData["Wiki"]["Refs"] = list(keep.values()) similarData = self.bsdata.find( "ol", {"class": "catalogue-overview-similar-artists-full-width"}) similarData = self.bsdata.find( "section", {"class": "artist-similar-sidebar" }) if similarData is None else similarData lis = similarData.findAll("li") if similarData is not None else None refs = [li.find("a", {"class": "link-block-target"}) for li in lis] if lis is not None else None similarArtists = [artistDBLinkClass(ref) for ref in refs] if (isinstance(refs, list) and len(refs) > 0) else None extraData = similarArtists ## ## Tags ## tags = self.bsdata.find("section", {"class": "catalogue-tags"}) refs = tags.findAll("a") if tags is not None else None tagsData = [artistDBLinkClass(ref) for ref in refs ] if (isinstance(refs, list) and len(refs) > 0) else None ## ## External ## external = self.bsdata.find("section", {"class": "external-links-section"}) refs = external.findAll("a") if external is not None else None externalData = [artistDBLinkClass(ref) for ref in refs] if (isinstance(refs, list) and len(refs) > 0) else None generalData = generalData if len(generalData) > 0 else None apc = artistDBProfileClass(general=generalData, tags=tagsData, extra=extraData, external=externalData) return apc
def getData(self, inputdata): self.getDataBase(inputdata) self.checkData() if self.dbdata is not None: return self.dbdata if not isinstance(self.bsdata, dict): raise ValueError("Could not parse Deezer API data") artist = self.bsdata artistTracks = artist["Tracks"] artistAlbums = artist["Albums"] artistName = artist["Name"] artistID = artist["ID"] artistURL = artist["URL"] generalData = {"Type": artist["Type"]} mediaData = {} mediaName = "Tracks" mediaData[mediaName] = [] for code, artistTrack in artistTracks.items(): album = artistTrack["Name"] albumURL = artistTrack["URL"] albumArtists = [artistName] amdc = artistDBMediaDataClass(album=album, url=albumURL, artist=albumArtists, code=code, year=None) mediaData[mediaName].append(amdc) mediaData = {} mediaName = "Albums" mediaData[mediaName] = [] for code, artistAlbum in artistAlbums.items(): album = artistAlbum["Name"] albumURL = artistAlbum["URL"] albumArtists = [artistName] amdc = artistDBMediaDataClass(album=album, url=albumURL, artist=albumArtists, code=code, year=None) mediaData[mediaName].append(amdc) artist = artistDBNameClass(name=artistName, err=None) meta = artistDBMetaClass(title=None, url=artistURL) url = artistDBURLClass(url=artistURL) ID = artistDBIDClass(ID=artistID) pages = artistDBPageClass(ppp=1, tot=1, redo=False, more=False) profile = artistDBProfileClass(general=generalData) media = artistDBMediaClass() media.media = mediaData mediaCounts = self.getMediaCounts(media) info = self.getInfo() adc = artistDBDataClass(artist=artist, meta=meta, url=url, ID=ID, pages=pages, profile=profile, mediaCounts=mediaCounts, media=media, info=info) return adc
def getData(self, inputdata): self.getDataBase(inputdata) self.checkData() if self.dbdata is not None: return self.dbdata if not isinstance(self.bsdata, dict): raise ValueError("Could not parse Discogs API data") artistData = self.bsdata["Artist"] albumsData = self.bsdata["Albums"] artistID = artistData.name artistName = artistData["name"] url = "https://www.discogs.com/artist/{0}".format(artistID) generalData = {} generalData["RealName"] = artistData["realname"] generalData["Aliases"] = artistData["MasterAliases"] generalData["Groups"] = artistData["MasterGroups"] generalData["Members"] = artistData["MasterMembers"] generalData["Variations"] = artistData["MasterNameVariations"] generalData = {k: v for k,v in generalData.items() if v is not None} generalData = generalData if len(generalData) > 0 else None ######################################################################## # Get Releases ######################################################################## mediaData = {} if isinstance(albumsData,list): for item in albumsData: code = item.get('id') albumType = item.get('type') albumFormat = item.get('format') albumLabel = item.get('label') albumName = item.get('name') albumURL = item.get('url') albumRole = item.get('role') albumArtist = item.get('artist') albumYear = item.get('year') albumMain = item.get('main_release') mediaName = self.getMediaType(item) amdc = artistDBMediaDataClass(album=albumName, url=albumURL, artist=albumArtist, code=code, aformat=albumFormat, aclass={"Label": albumLabel, "Main": albumMain}, year=albumYear) if mediaData.get(mediaName) is None: mediaData[mediaName] = [] mediaData[mediaName].append(amdc) elif isinstance(albumsData,dict): mediaData = albumsData else: raise ValueError("Not sure how to process albums [{0}]".format(albumsData)) artist = artistDBNameClass(name=artistName, err=None) meta = artistDBMetaClass(title=None, url=url) url = artistDBURLClass(url=url) ID = artistDBIDClass(ID=artistID) pages = artistDBPageClass(ppp=1, tot=1, redo=False, more=False) profile = artistDBProfileClass(general=generalData) media = artistDBMediaClass() media.media = mediaData mediaCounts = self.getMediaCounts(media) info = artistDBFileInfoClass(info=None) adc = artistDBDataClass(artist=artist, meta=meta, url=url, ID=ID, pages=pages, profile=profile, mediaCounts=mediaCounts, media=media, info=info) return adc
def getProfile(self): generalData = None genreData = None tagsData = None extraData = None content = self.bsdata.find("meta", {"name": "title"}) contentAttr = content.attrs if content is not None else None searchTerm = contentAttr.get( "content") if contentAttr is not None else None searchData = [artistDBTextClass(searchTerm) ] if searchTerm is not None else None tabsul = self.bsdata.find("ul", {"class": "tabs"}) #print('tabsul',tabsul) refs = tabsul.findAll("a") if tabsul is not None else None #print('refs',refs) tabLinks = [artistDBLinkClass(ref) for ref in refs] if refs is not None else None #print('tabLinks',tabLinks) #print('tabLinks',[x.get() for x in tabLinks]) tabKeys = [] if isinstance(tabLinks, list): for i, tabLink in enumerate(tabLinks): keyTitle = tabLink.title keyText = tabLink.text if keyTitle is not None: tabKeys.append(keyTitle) continue if keyText is not None: key = keyText.replace("\n", "").split()[0] tabKeys.append(key) continue tabKeys.append("Tab {0}".format(i)) else: tabKeys = None tabsData = dict(zip(tabKeys, tabLinks)) if (isinstance(tabKeys, list) and all(tabKeys)) else None #print('tabsData', tabsData) if searchData is not None: if extraData is None: extraData = {} extraData["Search"] = searchData if tabsData is not None: if extraData is None: extraData = {} extraData["Tabs"] = tabsData #print('extraData',extraData) basicInfo = self.bsdata.find("section", {"class": "basic-info"}) if basicInfo is not None: for div in basicInfo.findAll("div"): attrs = div.attrs.get('class') if isinstance(attrs, list) and len(attrs) == 1: attrKey = attrs[0] if attrKey == "genre": refs = div.findAll("a") val = [artistDBTextClass(div)] if len(refs) == 0 else [ artistDBLinkClass(ref) for ref in refs ] genreData = val elif attrKey == "styles": refs = div.findAll("a") val = [artistDBTextClass(div)] if len(refs) == 0 else [ artistDBLinkClass(ref) for ref in refs ] tagsData = val else: if generalData is None: generalData = {} refs = div.findAll("a") val = [artistDBTextClass(div)] if len(refs) == 0 else [ artistDBLinkClass(ref) for ref in refs ] generalData[attrKey] = val apc = artistDBProfileClass(general=generalData, tags=tagsData, genres=genreData, extra=extraData) return apc