def getMedia(self, artist): amc = artistDBMediaClass() amc.media["Singles"] = self.getMediaSingles() amc.media["Albums"] = self.getMediaAlbums() return amc
def getMedia(self): amc = artistDBMediaClass() mediaTypes = [x.text for x in self.bsdata.findAll("h3")] tables = dict(zip(mediaTypes, self.bsdata.findAll("table"))) for mediaType, table in tables.items(): headers = [x.text for x in table.findAll("th")] trs = table.findAll('tr') for tr in trs[1:]: tds = tr.findAll("td") ## Year idx = headers.index("Year") year = tds[idx].text ## Title idx = headers.index("Title") refs = [x.attrs['href'] for x in tds[idx].findAll('a')] if len(refs) == 0: raise ValueError("No link for album") url = refs[0] album = tds[idx].text m = md5() uuid = url.split("/")[-1] for val in uuid.split("-"): m.update(val.encode('utf-8')) hashval = m.hexdigest() code = int(hashval, 16) ## Artist idx = headers.index("Artist") artists = [] for artistVal in tds[idx].findAll('a'): url = artistVal.attrs['href'] name = artistVal.text m = md5() uuid = url.split("/")[-1] for val in uuid.split("-"): m.update(val.encode('utf-8')) hashval = m.hexdigest() artists.append({"name": name, "url": url}) amdc = artistDBMediaDataClass(album=album, url=url, aclass=None, aformat=None, artist=artists, code=code, year=year) if amc.media.get(mediaType) is None: amc.media[mediaType] = [] amc.media[mediaType].append(amdc) return amc
def getMedia(self): amc = artistDBMediaClass() mediaType = "Singles" amc.media[mediaType] = [] table = self.bsdata.find("table") if table is not None: ths = table.findAll("th") ths = [th.text for th in ths] trs = table.findAll("tr") for itr, tr in enumerate(trs[1:]): trackData = dict(zip(ths, tr.findAll("td"))) trackYear = None if trackData.get("Peak Date") is not None: trackYear = trackData["Peak Date"].text[:4] if trackData.get("Track") is None: continue trackURL = trackData["Track"].find("a") if trackURL is not None: trackURL = trackURL.attrs['href'] trackName = trackData["Track"].text trackArtists = [] for trackArtistData in trackData["With"].findAll("a"): trackArtistURL = trackArtistData.find("a") if trackArtistURL is not None: trackArtistURL = trackArtistURL.attrs['href'] trackArtistName = trackData["Track"].text trackArtists.append({ "Artist": trackArtistName, "URL": trackArtistURL }) code = self.dbUtils.getAlbumCode(name=trackName, url=trackURL) amdc = artistDBMediaDataClass(album=trackName, url=trackURL, aclass=None, aformat=None, artist=trackArtists, code=code, year=trackYear) if amc.media.get(mediaType) is None: amc.media[mediaType] = [] amc.media[mediaType].append(amdc) return amc
def getMedia(self, artist): amc = artistDBMediaClass() albumBlocks = self.bsdata.findAll("div", {"class": "albumBlock"}) for i, albumBlock in enumerate(albumBlocks): #print(i,'/',len(albumBlocks)) blockData = {} for div in albumBlock.findAll("div"): attr = div.attrs.get("class") key = attr[0] if isinstance(attr, list) else None ref = div.find("a") val = artistDBLinkClass( ref) if ref is not None else artistDBTextClass(div) blockData[key] = val urlData = blockData.get("image") url = urlData.href if isinstance(urlData, artistDBLinkClass) else None titleData = blockData.get("albumTitle") title = titleData.text if isinstance(titleData, artistDBTextClass) else None yearData = blockData.get("date") year = yearData.text if isinstance(yearData, artistDBTextClass) else None mediaTypeData = blockData.get("type") mediaType = mediaTypeData.text if isinstance( mediaTypeData, artistDBTextClass) else None code = self.dutils.getAlbumCode(name=title, url=url) amdc = artistDBMediaDataClass(album=title, url=url, aclass=None, aformat=None, artist="U2", code=code, year=year) if amc.media.get(mediaType) is None: amc.media[mediaType] = [] amc.media[mediaType].append(amdc) if self.debug: print("\t\tAdding Media ({0} -- {1})".format(title, url)) return amc
def getMedia(self, artist): amc = artistDBMediaClass() mediaType = "Albums" amc.media[mediaType] = [] for table in self.bsdata.findAll("table"): trs = table.findAll("tr") for itr, tr in enumerate(trs): tds = tr.findAll("td") for itd, td in enumerate(tds): div = td.find("div", {"class": "wrap"}) if div is not None: name = div.text if name.startswith("Album: "): mediaType = "Album" title = name[7:] else: mediaType = "Single" title = name if amc.media.get(mediaType) is None: amc.media[mediaType] = [] code = self.dbUtils.getAlbumCode(name=title, url=mediaType) amdc = artistDBMediaDataClass(album=title, url=None, aclass=None, aformat=None, artist=artist, code=code, year=None) if amc.media.get(mediaType) is None: amc.media[mediaType] = [] amc.media[mediaType].append(amdc) return amc
def getMedia(self, artist): amc = artistDBMediaClass() name = "Albums" amc.media[name] = [] script = self.bsdata.find("script", {"data-name": "initial-state"}) if script is None: print("No media data") return amc try: jdata = json.loads(script.contents[0]) except: print("Could not load JSON data") return amc mediaData = self.getArtistMediaData(jdata) for mediaType,mediaTypeData in mediaData.items(): if amc.media.get(mediaType) is None: amc.media[mediaType] = [] for amdc in mediaTypeData: amc.media[mediaType].append(amdc) return amc
def getMedia(self, artist, url): amc = artistDBMediaClass() mediadatas = self.bsdata.findAll("div", {"id": "discography"}) for mediadata in mediadatas: h3s = mediadata.findAll("h3", {"class": "disco_header_label"}) categories = [x.text for x in h3s] sufs = mediadata.findAll("div", {"class": "disco_showing"}) spans = [x.find("span") for x in sufs] ids = [x.attrs['id'] for x in spans] letters = [x[-1] for x in ids] for mediaType, suffix in dict(zip(categories, letters)).items(): categorydata = mediadata.find( "div", {"id": "disco_type_{0}".format(suffix)}) albumdatas = categorydata.findAll("div", {"class": "disco_release"}) for albumdata in albumdatas: ## Code codedata = albumdata.attrs['id'] code = codedata.split("_")[-1] try: int(code) except: code = None ## Title mainline = albumdata.find("div", {"class": "disco_mainline"}) maindata = self.getNamesAndURLs(mainline) try: album = maindata[0].name except: album = None try: albumurl = maindata[0].url except: albumurl = None ## Year yeardata = albumdata.find("span", {"class": "disco_year_y"}) if yeardata is None: yeardata = albumdata.find("span", {"class": "disco_year_ymd"}) year = None if yeardata is not None: year = yeardata.text ## Artists artistdata = albumdata.findAll("span")[-1] albumartists = self.getNamesAndURLs(artistdata) if len(albumartists) == 0: albumartists = [ artistDBURLInfo(name=artist.name, url=url.url.replace( "https://rateyourmusic.com", ""), ID=None) ] amdc = artistDBMediaDataClass(album=album, url=album, aclass=None, aformat=None, artist=albumartists, code=code, year=year) if amc.media.get(mediaType) is None: amc.media[mediaType] = [] amc.media[mediaType].append(amdc) #if self.debug: # print("Found Album: [{0}/{1}] : {2} / {3}".format(len(amc.media[mediaType]), mediaType, code, album, album)) classicalMedia = self.getClassicalMedia(artist, url) if len(classicalMedia) > 0: amc.media.update(classicalMedia) creditsMedia = self.getCreditsMedia(artist, url) if len(creditsMedia) > 0: amc.media.update(creditsMedia) return amc
def getData(self, inputdata): self.getDataBase(inputdata) self.checkData() if self.dbdata is not None: return self.dbdata if not isinstance(self.bsdata, dict): raise ValueError("Could not parse Discogs API data") artistData = self.bsdata["Artist"] albumsData = self.bsdata["Albums"] artistID = artistData.name artistName = artistData["name"] url = "https://www.discogs.com/artist/{0}".format(artistID) generalData = {} generalData["RealName"] = artistData["realname"] generalData["Aliases"] = artistData["MasterAliases"] generalData["Groups"] = artistData["MasterGroups"] generalData["Members"] = artistData["MasterMembers"] generalData["Variations"] = artistData["MasterNameVariations"] generalData = {k: v for k,v in generalData.items() if v is not None} generalData = generalData if len(generalData) > 0 else None ######################################################################## # Get Releases ######################################################################## mediaData = {} if isinstance(albumsData,list): for item in albumsData: code = item.get('id') albumType = item.get('type') albumFormat = item.get('format') albumLabel = item.get('label') albumName = item.get('name') albumURL = item.get('url') albumRole = item.get('role') albumArtist = item.get('artist') albumYear = item.get('year') albumMain = item.get('main_release') mediaName = self.getMediaType(item) amdc = artistDBMediaDataClass(album=albumName, url=albumURL, artist=albumArtist, code=code, aformat=albumFormat, aclass={"Label": albumLabel, "Main": albumMain}, year=albumYear) if mediaData.get(mediaName) is None: mediaData[mediaName] = [] mediaData[mediaName].append(amdc) elif isinstance(albumsData,dict): mediaData = albumsData else: raise ValueError("Not sure how to process albums [{0}]".format(albumsData)) artist = artistDBNameClass(name=artistName, err=None) meta = artistDBMetaClass(title=None, url=url) url = artistDBURLClass(url=url) ID = artistDBIDClass(ID=artistID) pages = artistDBPageClass(ppp=1, tot=1, redo=False, more=False) profile = artistDBProfileClass(general=generalData) media = artistDBMediaClass() media.media = mediaData mediaCounts = self.getMediaCounts(media) info = artistDBFileInfoClass(info=None) adc = artistDBDataClass(artist=artist, meta=meta, url=url, ID=ID, pages=pages, profile=profile, mediaCounts=mediaCounts, media=media, info=info) return adc
def getData(self, inputdata): self.getDataBase(inputdata) self.checkData() if self.dbdata is not None: return self.dbdata if not isinstance(self.bsdata, dict): raise ValueError("Could not parse Spotify API data") artistData = self.bsdata['Artist'] artistID = artistData.name artistURI = artistData.get('uri') artistType = artistData.get('stype') artistPopularity = artistData.get('popularity') artistName = artistData.get('name') artistAPIURL = artistData.get('href') artistGenres = artistData.get('genres', []) artistFollowers = artistData.get('followers') artistURL = artistData.get('urls', {}).get('spotify') generalData = {"Type": artistType} genresData = artistGenres if len(artistGenres) > 0 else None externalData = {'SpotifyAPI': {"URL": artistAPIURL, "URI": artistURI}} extraData = { 'Followers': artistFollowers, "Popularity": artistPopularity } mediaData = {} albumsData = self.bsdata['Albums'] if len(albumsData) > 0: albumsURL = albumsData.get('href') if albumsData.get('artistID') != artistID: raise ValueError( "ArtistIDs do not match for Spotify API Data! [{0}, {1}]". format(albumsData.get('artistID'), artistID)) mediaData = {} for albumData in albumsData.get('albums', []): albumID = albumData.get('sid') albumGroup = albumData.get('album_group') albumType = albumData.get('album_type') albumSType = albumData.get('stype') albumArtists = [{ artist['sid']: artist['name'] } for artist in albumData.get('artists', [])] albumURL = albumData.get('urls', {}).get('spotify') albumURI = albumData.get('uri') albumAPI = albumData.get('href') albumName = albumData.get('name') albumTracks = albumData.get('numtracks') albumDate = albumData.get('date') try: albumYear = to_datetime( albumDate).year if albumDate is not None else None except: albumYear = None if all([albumGroup, albumType]): mediaName = " + ".join([albumGroup, albumType]) elif albumGroup is not None: mediaName = albumGroup elif albumType is not None: mediaName = albumType else: mediaName = "Unknown" amdc = artistDBMediaDataClass(album=albumName, url=albumURL, artist=albumArtists, code=albumID, year=albumYear, aclass=albumSType, aformat={ "URI": albumURI, "API": albumAPI, "Date": albumDate, "NumTracks": albumTracks }) if mediaData.get(mediaName) is None: mediaData[mediaName] = [] mediaData[mediaName].append(amdc) artist = artistDBNameClass(name=artistName, err=None) meta = artistDBMetaClass(title=None, url=artistURL) url = artistDBURLClass(url=artistURL) ID = artistDBIDClass(ID=artistID) pages = artistDBPageClass(ppp=1, tot=1, redo=False, more=False) profile = artistDBProfileClass(general=generalData, external=externalData, extra=extraData, genres=genresData) media = artistDBMediaClass() media.media = mediaData mediaCounts = self.getMediaCounts(media) info = self.getInfo() adc = artistDBDataClass(artist=artist, meta=meta, url=url, ID=ID, pages=pages, profile=profile, mediaCounts=mediaCounts, media=media, info=info) return adc
def getMedia(self, artist): amc = artistDBMediaClass() name = "Albums" amc.media[name] = [] jdata = None for meta in self.bsdata.findAll("meta"): content = meta.attrs['content'] if content.startswith("{") and content.endswith("}"): try: jdata = json.loads(content) except: continue break if jdata is not None: try: artistName = jdata['artist']['name'] except: artistName = None mediaType = "Albums" if jdata.get('artist_albums') is not None: for albumData in jdata['artist_albums']: albumName = albumData['name'] albumID = albumData['id'] try: albumYear = albumData['release_date_components'][ 'year'] except: albumYear = None if amc.media.get(mediaType) is None: amc.media[mediaType] = [] amdc = artistDBMediaDataClass(album=albumName, url=None, aclass=None, aformat=None, artist=[artistName], code=albumID, year=albumYear) amc.media[mediaType].append(amdc) mediaType = "Singles" if jdata.get('artist_songs') is not None: for songData in jdata['artist_songs']: songName = songData['title'] songID = songData['id'] if amc.media.get(mediaType) is None: amc.media[mediaType] = [] amdc = artistDBMediaDataClass(album=songName, url=None, aclass=None, aformat=None, artist=[artistName], code=songID, year=None) amc.media[mediaType].append(amdc) return amc
def getMedia(self, artist): amc = artistDBMediaClass() name = "Albums" amc.media[name] = [] mediaType = "Albums" albumsection = self.bsdata.find("section", {"id": "artist-albums-section"}) if albumsection is None: if self.debug: print("\t\tNo Album Section!") amc.media[mediaType] = [] return amc raise ValueError("Cannot find album section!") ols = albumsection.findAll( "ol", {"class": "buffer-standard"} ) # resource-list--release-list resource-list--release-list--with-20"}) if self.debug: print("\t\tFound {0} Resource Lists".format(len(ols))) for ol in ols: lis = ol.findAll( "li", {"class": "resource-list--release-list-item-wrap"}) for il, li in enumerate(lis): h3 = li.find( "h3", {"class": "resource-list--release-list-item-name"}) if h3 is None: if self.debug: print( "\t\tNo <h3> in artist list section ({0}/{1}): {2}" .format(il, len(lis), li)) continue raise ValueError( "No <h3> in artist list section ({0}/{1}): {2}".format( il, len(lis), li)) linkdata = self.getNamesAndURLs(h3) if len(linkdata) == 0: continue #print(linkdata[0].get()) ## Name album = linkdata[0].name #amdc = artistDBMediaDataClass(album=album, url=url, aclass=None, aformat=None, artist=None, code=code, year=year) ## URL url = linkdata[0].url ## Code code = self.dutils.getArtistID(album) ## Year year = None codedatas = li.findAll( "p", {"class", "resource-list--release-list-item-aux-text"}) if len(codedatas) == 2: codedata = codedatas[1].text vals = [x.strip() for x in codedata.split("\n")] if len(vals) == 5: try: year = vals[2][:-2] year = year.split()[-1] year = int(year) except: year = None amdc = artistDBMediaDataClass(album=album, url=url, aclass=None, aformat=None, artist=[artist.name], code=code, year=year) if amc.media.get(mediaType) is None: amc.media[mediaType] = [] amc.media[mediaType].append(amdc) if self.debug: print("\t\tAdding Media ({0} -- {1})".format(album, url)) return amc
def getMedia(self): amc = artistDBMediaClass() mediaType = "Videos" amc.media[mediaType] = [] for table in self.bsdata.findAll("table"): trs = table.findAll("tr") ths = [th.text for th in table.findAll("th")] for tr in trs[1:]: td = tr.find('td') ref = td.find("a") name = td.text url = None if ref is not None: url = ref.attrs['href'] #https://kworb.net/youtube/video/fRh_vgS2dFE.html trackURL = "https://kworb.net/youtube/video/{0}.html".format( getBaseFilename(url)) songData = name.split(' - ') artistName = songData[0] trackName = " - ".join(songData[1:]) removes = [] removes = [ "(Official Music Video)", "(Official Lyric Video)", "(Official Video (Short Version))", "(Official Video)", "[Lyric Video]", "(Video Version)", "[Official Music Video]", "(Official Audio)", "(Shazam Version)", "(Explicit)", "(Dance Video)", "(Lyric Video)", "[Official Video]", "(Official Dance Video)", '(Acoustic)', '(Audio)', '(Visualizer)', '(Video Commentary)', '(VEVO Footnotes)', '(Choir Version)', '(Fan Lip Sync Version)', '(Trailer)', '(Teaser)' ] for rmText in removes: trackName = trackName.replace(rmText, "").strip() while trackName.find(" ") != -1: trackName = trackName.replace(" ", " ") if len(trackName) <= 1: break if len(trackName.strip()) == 0: continue code = self.dbUtils.getAlbumCode(name=trackName, url=trackURL) amdc = artistDBMediaDataClass(album=trackName, url=trackURL, aclass=None, aformat=None, artist=artistName, code=code, year=None) if amc.media.get(mediaType) is None: amc.media[mediaType] = [] amc.media[mediaType].append(amdc) return amc
def getData(self, inputdata): self.getDataBase(inputdata) self.checkData() if self.dbdata is not None: return self.dbdata if not isinstance(self.bsdata, dict): raise ValueError("Could not parse Deezer API data") artist = self.bsdata artistTracks = artist["Tracks"] artistAlbums = artist["Albums"] artistName = artist["Name"] artistID = artist["ID"] artistURL = artist["URL"] generalData = {"Type": artist["Type"]} mediaData = {} mediaName = "Tracks" mediaData[mediaName] = [] for code, artistTrack in artistTracks.items(): album = artistTrack["Name"] albumURL = artistTrack["URL"] albumArtists = [artistName] amdc = artistDBMediaDataClass(album=album, url=albumURL, artist=albumArtists, code=code, year=None) mediaData[mediaName].append(amdc) mediaData = {} mediaName = "Albums" mediaData[mediaName] = [] for code, artistAlbum in artistAlbums.items(): album = artistAlbum["Name"] albumURL = artistAlbum["URL"] albumArtists = [artistName] amdc = artistDBMediaDataClass(album=album, url=albumURL, artist=albumArtists, code=code, year=None) mediaData[mediaName].append(amdc) artist = artistDBNameClass(name=artistName, err=None) meta = artistDBMetaClass(title=None, url=artistURL) url = artistDBURLClass(url=artistURL) ID = artistDBIDClass(ID=artistID) pages = artistDBPageClass(ppp=1, tot=1, redo=False, more=False) profile = artistDBProfileClass(general=generalData) media = artistDBMediaClass() media.media = mediaData mediaCounts = self.getMediaCounts(media) info = self.getInfo() adc = artistDBDataClass(artist=artist, meta=meta, url=url, ID=ID, pages=pages, profile=profile, mediaCounts=mediaCounts, media=media, info=info) return adc
def getMedia(self): amc = artistDBMediaClass() table = self.bsdata.find("table", {"id": "artist"}) if table == None: amc.err="NoMedia" return amc name = None for tr in table.findAll("tr"): h3 = tr.find("h3") if h3: name = h3.text amc.media[name] = [] continue # Album, Class, Format result = tr.find("td", {"class": "title"}) album = None url = None albumformat = name if result: retval = self.getMediaAlbum(result) album = fixName(retval.album) url = retval.url albumformat = retval.aformat if album == None: continue # Code code = tr.attrs.get("data-object-id") # AlbumClass albumclass = tr.attrs.get("data-object-type") # AlbumURL result = tr.find("td", {"class": "artist"}) artists = None if result: artists = self.getNamesAndURLs(result) # Year result = tr.find("td", {"class": "year"}) year = None if result: year = result.text if name is None: name = "Albums" amc.media[name] = [] amdc = artistDBMediaDataClass(album=album, url=url, aclass=albumclass, aformat=albumformat, artist=artists, code=code, year=year) amc.media[name].append(amdc) #if debug: print " Found album:",album,"of type:",name if False: newMedia = {} for name,v in media.items(): newMedia[name] = {} for item in v: code = item['Code'] del item['Code'] newMedia[name][code] = item media = newMedia return amc
def getMedia(self, artist): amc = artistDBMediaClass() mediaType = "Albums" amc.media[mediaType] = [] artistSection = self.bsdata.find("section", {"id": "album-artist"}) if artistSection is None: pass #raise ValueError("Cannot find Artist Section") else: articles = artistSection.findAll("article") for ia, article in enumerate(articles): ref = article.find('a') if ref is None: raise ValueError("No ref in article") albumURL = ref.attrs['href'] caption = ref.find("figcaption") if caption is None: raise ValueError("No figcaption in article") b = caption.find("b") if b is None: raise ValueError("No bold in caption") i = caption.find("i") if i is None: raise ValueError("No italics in caption") albumName = b.text albumYear = i.text m = md5() for val in albumURL.split("/"): m.update(val.encode('utf-8')) hashval = m.hexdigest() code = str(int(hashval, 16) % int(1e9)) artists = [artist.name] amdc = artistDBMediaDataClass(album=albumName, url=albumURL, aclass=None, aformat=None, artist=artists, code=code, year=albumYear) if amc.media.get(mediaType) is None: amc.media[mediaType] = [] amc.media[mediaType].append(amdc) mediaType = "Songs" amc.media[mediaType] = [] singlesSection = self.bsdata.find("ol", {"id": "songs-list"}) if singlesSection is None: pass #raise ValueError("Cannot find Singles Section") else: lis = singlesSection.findAll("li") for li in lis: ref = li.find('a') if ref is None: raise ValueError("No ref in article") albumURL = ref.attrs['href'] b = ref.find("b") if b is None: raise ValueError("No bold in ref") albumName = b.text albumYear = None m = md5() for val in albumURL.split("/"): m.update(val.encode('utf-8')) hashval = m.hexdigest() code = str(int(hashval, 16) % int(1e10)) artists = [artist.name] amdc = artistDBMediaDataClass(album=albumName, url=albumURL, aclass=None, aformat=None, artist=artists, code=code, year=albumYear) if amc.media.get(mediaType) is None: amc.media[mediaType] = [] amc.media[mediaType].append(amdc) return amc
def getData(self, inputdata): self.getDataBase(inputdata) self.checkData() if self.dbdata is not None: return self.dbdata if not isinstance(self.bsdata, dict): raise ValueError("Could not parse LastFM API data") tracks = self.bsdata["Tracks"] albums = self.bsdata["Albums"] if len(tracks) > 0: artistData = { "Name": tracks[0]["artistName"], "URL": tracks[0]["artistURL"], "MBID": tracks[0]["artistMBID"] } elif len(albums) > 0: artistData = { "Name": albums[0]["artistName"], "URL": albums[0]["artistURL"], "MBID": albums[0]["artistMBID"] } else: return None raise ValueError("No track/album data!") artistName = artistData["Name"] artistURL = artistData["URL"] artistID = self.dbUtils.getArtistID(artistURL) generalData = None externalData = {"MusicBrainzID": artistData["MBID"]} #mbID = mbutil.getArtistID(artistData['MBID'] trackData = [{ "Name": track["name"], "URL": track["URL"], "Counts": int(track["counts"]) } for track in tracks if int(track["counts"]) > 50] counts = sorted([x["Counts"] for x in trackData], reverse=True) idx = min([len(counts) - 1, 1000 - 1]) trackData = [v for v in trackData if v['Counts'] >= counts[idx]] albumData = [{ "Name": album["name"], "URL": album["URL"], "Counts": int(album["counts"]) } for album in albums if int(album["counts"]) > 25] counts = sorted([x["Counts"] for x in albumData], reverse=True) idx = min([len(counts) - 1, 1000 - 1]) albumData = [v for v in albumData if v['Counts'] >= counts[idx]] mediaData = {} if len(trackData) > 0: mediaName = "Tracks" mediaData[mediaName] = [] for artistTrack in trackData: m = md5() m.update(artistTrack['Name'].encode('utf-8')) m.update(artistTrack['URL'].encode('utf-8')) hashval = m.hexdigest() code = str(int(hashval, 16) % int(1e7)) album = artistTrack["Name"] albumURL = artistTrack["URL"] albumArtists = [artistData["Name"]] amdc = artistDBMediaDataClass(album=album, url=albumURL, artist=albumArtists, code=code, year=None) mediaData[mediaName].append(amdc) if len(albumData) > 0: mediaName = "Albums" mediaData[mediaName] = [] for artistAlbum in albumData: m = md5() m.update(artistAlbum['Name'].encode('utf-8')) m.update(artistAlbum['URL'].encode('utf-8')) hashval = m.hexdigest() code = str(int(hashval, 16) % int(1e7)) album = artistAlbum["Name"] albumURL = artistAlbum["URL"] albumArtists = [artistName] amdc = artistDBMediaDataClass(album=album, url=albumURL, artist=albumArtists, code=code, year=None) mediaData[mediaName].append(amdc) artist = artistDBNameClass(name=artistName, err=None) meta = artistDBMetaClass(title=None, url=artistURL) url = artistDBURLClass(url=artistURL) ID = artistDBIDClass(ID=artistID) pages = artistDBPageClass(ppp=1, tot=1, redo=False, more=False) profile = artistDBProfileClass(general=generalData, external=externalData) media = artistDBMediaClass() media.media = mediaData mediaCounts = self.getMediaCounts(media) info = self.getInfo() adc = artistDBDataClass(artist=artist, meta=meta, url=url, ID=ID, pages=pages, profile=profile, mediaCounts=mediaCounts, media=media, info=info) return adc
def getMedia(self, url): amc = artistDBMediaClass() name = "Albums" if url is None: name = "Unknown" else: if url.find("/credits") != -1: name = "Credits" if url.find("/songs") != -1: name = "Songs" if url.find("/compositions") != -1: name = "Compositions" amc.media[name] = [] tables = self.bsdata.findAll("table") for table in tables: trs = table.findAll("tr") header = trs[0] ths = header.findAll("th") headers = [x.text.strip() for x in ths] if len(headers) == 0: continue for tr in trs[1:]: tds = tr.findAll("td") ## Name key = "Name" try: if len(headers[1]) == 0: idx = 1 mediaType = tds[idx].text.strip() if len(mediaType) == 0: mediaType = name else: mediaType = name except: #print("Error getting key: {0} from AllMusic media".format(key)) break ## Year key = "Year" try: idx = headers.index(key) year = tds[idx].text.strip() except: #print("Error getting key: {0} from AllMusic media".format(key)) continue ## Title key = "Album" try: idx = headers.index(key) ref = tds[idx].findAll("a") except: #print("Error getting key: {0} from AllMusic media".format(key)) continue try: refdata = ref[0] url = refdata.attrs['href'] album = refdata.text.strip() data = url.split("/")[-1] pos = data.rfind("-") discIDurl = data[(pos + 3):] discID = discIDurl.split("/")[0] try: int(discID) code = discID except: code = None except: url = None code = None continue amdc = artistDBMediaDataClass(album=album, url=url, aclass=None, aformat=None, artist=None, code=code, year=year) if amc.media.get(mediaType) is None: amc.media[mediaType] = [] amc.media[mediaType].append(amdc) compMedia = self.getMediaCompositions() for mediaType, mediaTypeData in compMedia.items(): if amc.media.get(mediaType) is None: amc.media[mediaType] = [] amc.media[mediaType] += mediaTypeData songMedia = self.getMediaSongs() for mediaType, mediaTypeData in songMedia.items(): if amc.media.get(mediaType) is None: amc.media[mediaType] = [] amc.media[mediaType] += mediaTypeData return amc