def findMyMusic(self, primeDir=None, artistName=None): artistAlbums = {} if primeDir is None and artistName is None: ts = timestat("Find PrimeDir Artist Paths") pdPaths = { pd: pdpath for pd, pdpath in {pd: setDir(self.musicDir, pd) for pd in self.pdDirs}.items() if dirUtil(pdpath).isDir() } pdArtistPaths = { pd: findDirs(pdpath) for pd, pdpath in pdPaths.items() } artistPaths = { fsap.name: fsap.path for fsap in [dirUtil(ap) for ap in getFlatList(pdArtistPaths.values())] } artistAlbums = { artistName: self.getArtistPathData(artistName, artistPath) for artistName, artistPath in artistPaths.items() } print(" Found {0} Artists From {1} Prime Directories".format( len(artistAlbums), len(pdArtistPaths))) ts.stop() elif primeDir is not None: ts = timestat( "Finding All Artist Albums From [{0}] Prime Directory".format( primeDir)) pdPaths = { pd: pdpath for pd, pdpath in {pd: setDir(self.musicDir, pd) for pd in [primeDir]}.items() if dirUtil(pdpath).isDir() } pdArtistPaths = { pd: findDirs(pdpath) for pd, pdpath in pdPaths.items() } artistPaths = { fsap.name: fsap.path for fsap in [dirUtil(ap) for ap in getFlatList(pdArtistPaths.values())] } artistAlbums = { artistName: self.getArtistPathData(artistName, artistPath) for artistName, artistPath in artistPaths.items() } print(" Found {0} Artists From [{1}] Prime Directory".format( len(artistAlbums), primeDir)) ts.stop() elif artistName is not None: ts = timestat("Finding [{0}] Artist Albums".format(artistName)) artistAlbums = self.getArtistPathData(artistName) ts.stop() self.artistAlbums = artistAlbums return artistAlbums
def getArtistAlbumsData(self, db, dtype): if dtype == "Master": return self.discs[db].getMasterDBArtistIDToAlbumsData() elif dtype == "DB": return self.discs[db].getArtistIDToAlbumNamesData( ).apply(lambda val: getFlatList( [mediaData.values() for mediaType, mediaData in val.items()])) elif dtype == "Search": return self.discs[db].getArtistIDToSearchAlbumNamesData( ).apply(lambda val: getFlatList( [mediaData.values() for mediaType, mediaData in val.items()])) elif dtype == "Merged": raise ValueError("Not ready") elif dtype == "User": raise ValueError("Not ready")
def findSearchTerms(self, minCnts=25): from collections import Counter from time import sleep from glob import glob artistsCntr = Counter() known = getFile(self.knownFile) files = getFlatList([findExt(dirval, ext='.p') for dirval in self.getModValDirs()]) for ifile in files: #for ifile in glob("/Volumes/Piggy/Discog/artists-datpiff/*/*.p"): if ifile.endswith("datPiffKnown.p"): continue tmp = getFile(ifile) #print(ifile,'\t',len(tmp)) results = [x["ArtistName"] for x in tmp] for artist in results: artists = self.mulArts.getArtistNames(artist) for artist in artists.keys(): key = artist.title() if len(key) > 1 and key not in known: artistsCntr[key] += 1 searchTerms = [item[0] for item in artistsCntr.most_common() if item[1] >= minCnts] print("There are {0} new searches".format(len(searchTerms))) return searchTerms
def getMatchedArtistAlbumsFromDB(self, dbartist, merge=True): dbArtistData = self.mdb.getArtistData(dbartist) dbsToSearch = self.getArtistDBMatchLists(dbartist) albumTypesData = {k: [] for k in [1, 2, 3, 4]} for db in dbsToSearch["Matched"]: dbIDdata = dbArtistData[db] try: dbID = dbIDdata["ID"] except: raise ValueError( "This db {0} should already be known for {1}".format( db, dbartist)) dbAlbumsData = self.mdb.getArtistAlbumsFromID(db, dbID) for albumType in albumTypesData.keys(): for mediaType, mediaTypeAlbums in dbAlbumsData.items(): if mediaType not in self.mdb.getDBAlbumTypeNames( db, albumType): continue #print(db,albumType,mediaType,mediaTypeAlbums) albumTypesData[albumType] += list(mediaTypeAlbums.values()) albumTypesData = {k: list(set(v)) for k, v in albumTypesData.items()} ############################ ## Merge Albums ############################ if merge is True: artistAlbums = getFlatList(albumTypesData.values()) else: artistAlbums = albumTypesData return artistAlbums
def manuallyMatchUnknownArtist(self, unknownArtist, cutoff=0.8): ###################################################################### #### Get Unknown Artist Albums and Potential DB Artists ###################################################################### unMatchedAlbums = self.mmb.getUnMatchedAlbumsByArtist(unknownArtist) artistNameDBIDs = self.mdb.getArtistIDs(unknownArtist, cutoff=cutoff) print("Unknown Artist: {0}".format(unknownArtist)) try: print("UnMatched Albums: {0}".format(", ".join(unMatchedAlbums))) except: print("Could not show the unMatched Albums below:") print("-> ", unMatchedAlbums, " <-") print("=" * 50) print(artistNameDBIDs) for db, artistDBartists in artistNameDBIDs.items(): print("=" * 50) print(" {0}".format(db)) for artistDBartist, artistDBIDs in artistDBartists.items(): print(" {0}".format(artistDBartist)) for artistDBID in artistDBIDs: artistDBAlbumsFromID = self.mdb.getArtistAlbumsFromID( db, artistDBID) albums = [ list(mediaTypeAlbums.values()) for mediaTypeAlbums in artistDBAlbumsFromID.values() ] print("mdb.add(\"{0}\", \"{1}\", \"{2}\")".format( unknownArtist, db, artistDBID)) print(" {0: <45}\t{1}".format( artistDBID, getFlatList(albums)))
def createMasterDBArtistAlbumsDataFrame(self): ts = timestat("=================================== Creating Artist Album DB ===================================") print("Loading ArtistID Data") artistIDtoAlbumNames = self.disc.getArtistIDToAlbumNamesData() print("Creating Flattened List for {0} Artists".format(artistIDtoAlbumNames.shape[0])) artistIDToAlbumNamesData = artistIDtoAlbumNames.apply(lambda val: getFlatList([mediaData.values() for mediaType,mediaData in val.items()])) savename = setFile(self.disc.getDiscogDBDir(), "MasterArtistIDToAlbums.p") print("Saving {0}/{1} artists/albums to {2}".format(len(artistIDToAlbumNamesData), artistIDToAlbumNamesData.apply(len).sum(), savename)) saveFile(ifile=savename, idata=artistIDToAlbumNamesData) artistIDToNumAlbumsData = artistIDToAlbumNamesData.apply(len) artistIDToNumAlbumsData.name = "NumAlbums" savename = setFile(self.disc.getDiscogDBDir(), "MasterArtistIDToNumAlbums.p") print("Saving {0}/{1} artists/albums to {2}".format(len(artistIDToNumAlbumsData), artistIDToNumAlbumsData.sum(), savename)) saveFile(ifile=savename, idata=artistIDToNumAlbumsData) ts.stop() return ts = timestat("=================================== Creating Artist Album DB ===================================") print("Creating Pandas DataFrame for {0} Artists".format(artistIDtoAlbumNames.shape[0])) cols = ["Albums"] discdf = DataFrame(artistIDtoAlbumNames) discdf.columns = cols print("\tShape --> {0}".format(discdf.shape)) print("DataFrame Shape is {0}".format(discdf.shape)) saveFilename = self.disc.getMasterDBArtistAlbumsFilename() print("Saving Master Artist Albums DB File: {0}".format(saveFilename)) saveFile(ifile=saveFilename, idata=discdf, debug=False) ts.stop()
def getMappingDirs(self): return getFlatList(self.directoryMapping.values())
def getMyOtherMusicAlbums(self, dirval, mname): return getFlatList([[p for p in findDirs(setDir(dirval, mdirval))] for mdirval in self.mdirmap.getMapping(mname)])
def getMyMatchedMusicAlbums(self, dirval): return getFlatList( [[p for p in findSubDirs(setDir(dirval, mdirval), "*")] for mdirval in self.mdirmap.getMapping("Match")])
def searchForMutualDBEntries(self, cutoff=0.875, maxAdds=50, start=None, modVal=100, dbs=None): ###################################################################### #### Get Map of Artists and Unmatched Albums ###################################################################### dbartists = self.mdb.getArtists() cnts = 0 print("Searching for mutual DB matches for {0} artists".format( len(dbartists))) for ia, dbartist in enumerate(dbartists): if start is not None: if ia < start: continue if ia % modVal == 0: print("## {0: <20} -- {1}".format( "{0}/{1}".format(ia, len(dbartists)), dbartist)) if cnts >= maxAdds: break artistAlbums = self.getMatchedArtistAlbumsFromDB(dbartist, merge=True) dbsToSearch = self.getArtistDBMatchLists(dbartist) if dbs is not None: usefulDBs = ['Discogs', 'MusicBrainz', 'AllMusic', 'LastFM'] else: usefulDBs = dbs usefulDBsToSearch = list( set(dbsToSearch["Unmatched"]).intersection(set(usefulDBs))) ######################################################## ## Loop Over Unmatched DBs ######################################################## for db in usefulDBsToSearch: dbMatches = {} artistDBartists = self.mdb.getArtistDBIDs(dbartist, db, num=10, cutoff=cutoff, debug=False) for artistDBartist, artistDBIDs in artistDBartists.items(): #print(' ',db,'\t',artistDBartist) for artistDBID in artistDBIDs: #print(' ',artistDBID) dbMatches[artistDBID] = {} artistDBAlbumsFromID = self.mdb.getArtistAlbumsFromID( db, artistDBID) albumTypesData = {k: [] for k in [1, 2, 3, 4]} for albumType in albumTypesData.keys(): for mediaType, mediaTypeAlbums in artistDBAlbumsFromID.items( ): if mediaType not in self.mdb.getDBAlbumTypeNames( db, albumType): continue albumTypesData[albumType] += list( mediaTypeAlbums.values()) albumTypesData = { k: list(set(v)) for k, v in albumTypesData.items() } dbArtistAlbums = getFlatList(albumTypesData.values()) ma = matchAlbums(cutoff=cutoff) ma.match(artistAlbums, dbArtistAlbums) #ma.show(debug=True) dbMatches[artistDBID] = ma if len(dbMatches) > 0: bestMatch = {"ID": None, "Matches": 0, "Score": 0.0} for artistDBID, ma in dbMatches.items(): if ma.near == 0: continue if ma.near > bestMatch["Matches"]: bestMatch = { "ID": artistDBID, "Matches": ma.near, "Score": ma.score } elif ma.near == bestMatch["Matches"]: if ma.score > bestMatch["Score"]: bestMatch = { "ID": artistDBID, "Matches": ma.near, "Score": ma.score } if bestMatch["ID"] is not None: cnts += 1 print("mdb.add(\"{0}\", \"{1}\", \"{2}\")".format( dbartist, db, bestMatch["ID"]))
def searchForMutualDBEntriesByDB(self, db, cutoff=0.875, maxAdds=50, start=None, modVal=100, maxAlbumsForSearch=500): ###################################################################### #### Get Map of Artists and Unmatched Albums ###################################################################### dbartists = self.mdb.getArtists() cnts = 0 print("Searching for mutual DB matches for {0} artists".format( len(dbartists))) for ia, dbartist in enumerate(dbartists): if start is not None: if ia < start: continue if ia % modVal == 0: print("## {0: <20} -- {1}".format( "{0}/{1}".format(ia, len(dbartists)), dbartist)) if cnts >= maxAdds: break status = self.mdb.getArtistDBData(dbartist, db) if status["ID"] is not None: continue artistAlbums = self.getMatchedArtistAlbumsFromDB(dbartist, merge=True) ######################################################## ## Loop Over Unmatched DBs ######################################################## dbMatches = {} artistDBartists = self.mdb.getArtistDBIDs(dbartist, db, num=10, cutoff=cutoff, debug=False) matchStatus = True for artistDBartist, artistDBIDs in artistDBartists.items(): if matchStatus is False: continue #print(' ',db,'\t',artistDBartist) for artistDBID in artistDBIDs: #print(' ',artistDBID) dbMatches[artistDBID] = {} artistDBAlbumsFromID = self.mdb.getArtistAlbumsFromID( db, artistDBID) albumTypesData = {k: [] for k in [1, 2, 3, 4]} for albumType in albumTypesData.keys(): for mediaType, mediaTypeAlbums in artistDBAlbumsFromID.items( ): if mediaType not in self.mdb.getDBAlbumTypeNames( db, albumType): continue albumTypesData[albumType] += list( mediaTypeAlbums.values()) albumTypesData = { k: list(set(v)) for k, v in albumTypesData.items() } dbArtistAlbums = getFlatList(albumTypesData.values()) if len(dbArtistAlbums) > maxAlbumsForSearch: matchStatus = False print( "#\tNot checking {0} because there are {1} > {2} albums" .format(dbartist, len(dbArtistAlbums), maxAlbumsForSearch)) continue ma = matchAlbums(cutoff=cutoff) ma.match(artistAlbums, dbArtistAlbums) #ma.show(debug=True) dbMatches[artistDBID] = ma if matchStatus is False: continue if len(dbMatches) > 0: bestMatch = {"ID": None, "Matches": 0, "Score": 0.0} for artistDBID, ma in dbMatches.items(): if ma.near == 0: continue if ma.near > bestMatch["Matches"]: bestMatch = { "ID": artistDBID, "Matches": ma.near, "Score": ma.score } elif ma.near == bestMatch["Matches"]: if ma.score > bestMatch["Score"]: bestMatch = { "ID": artistDBID, "Matches": ma.near, "Score": ma.score } if bestMatch["ID"] is not None: cnts += 1 print("mdb.add(\"{0}\", \"{1}\", \"{2}\")".format( dbartist, db, bestMatch["ID"]))
def getMatchedPrimeAlbumDirs(self, primeDir, matchedDirs): dirvals = getFlatList([findDirs(setDir(matchedDir, primeDir)) for matchedDir in matchedDirs]) artistNames = [normalize('NFC', getDirBasics(dirval)[-1]) for dirval in dirvals] return list(zip(artistNames, dirvals))
def getArtistAlbumTypesByArtist(self, artistName, albumType): artistData = self.getArtistAlbumsByArtist(artistName) if artistData.get(albumType): albums = artistData[albumType].albums.values() return getFlatList(albums) return None