Ejemplo n.º 1
0
    def findMyMusic(self, primeDir=None, artistName=None):
        artistAlbums = {}
        if primeDir is None and artistName is None:
            ts = timestat("Find PrimeDir Artist Paths")
            pdPaths = {
                pd: pdpath
                for pd, pdpath in
                {pd: setDir(self.musicDir, pd)
                 for pd in self.pdDirs}.items() if dirUtil(pdpath).isDir()
            }
            pdArtistPaths = {
                pd: findDirs(pdpath)
                for pd, pdpath in pdPaths.items()
            }
            artistPaths = {
                fsap.name: fsap.path
                for fsap in
                [dirUtil(ap) for ap in getFlatList(pdArtistPaths.values())]
            }
            artistAlbums = {
                artistName: self.getArtistPathData(artistName, artistPath)
                for artistName, artistPath in artistPaths.items()
            }
            print("  Found {0} Artists From {1} Prime Directories".format(
                len(artistAlbums), len(pdArtistPaths)))
            ts.stop()
        elif primeDir is not None:
            ts = timestat(
                "Finding All Artist Albums From [{0}] Prime Directory".format(
                    primeDir))
            pdPaths = {
                pd: pdpath
                for pd, pdpath in
                {pd: setDir(self.musicDir, pd)
                 for pd in [primeDir]}.items() if dirUtil(pdpath).isDir()
            }
            pdArtistPaths = {
                pd: findDirs(pdpath)
                for pd, pdpath in pdPaths.items()
            }
            artistPaths = {
                fsap.name: fsap.path
                for fsap in
                [dirUtil(ap) for ap in getFlatList(pdArtistPaths.values())]
            }
            artistAlbums = {
                artistName: self.getArtistPathData(artistName, artistPath)
                for artistName, artistPath in artistPaths.items()
            }
            print("  Found {0} Artists From [{1}] Prime Directory".format(
                len(artistAlbums), primeDir))
            ts.stop()
        elif artistName is not None:
            ts = timestat("Finding [{0}] Artist Albums".format(artistName))
            artistAlbums = self.getArtistPathData(artistName)
            ts.stop()

        self.artistAlbums = artistAlbums
        return artistAlbums
Ejemplo n.º 2
0
 def getArtistAlbumsData(self, db, dtype):
     if dtype == "Master":
         return self.discs[db].getMasterDBArtistIDToAlbumsData()
     elif dtype == "DB":
         return self.discs[db].getArtistIDToAlbumNamesData(
         ).apply(lambda val: getFlatList(
             [mediaData.values() for mediaType, mediaData in val.items()]))
     elif dtype == "Search":
         return self.discs[db].getArtistIDToSearchAlbumNamesData(
         ).apply(lambda val: getFlatList(
             [mediaData.values() for mediaType, mediaData in val.items()]))
     elif dtype == "Merged":
         raise ValueError("Not ready")
     elif dtype == "User":
         raise ValueError("Not ready")
Ejemplo n.º 3
0
    def findSearchTerms(self, minCnts=25):
        from collections import Counter
        from time import sleep
        from glob import glob

        artistsCntr = Counter()
        known = getFile(self.knownFile)
        
        files  = getFlatList([findExt(dirval, ext='.p') for dirval in self.getModValDirs()])
        for ifile in files:
        #for ifile in glob("/Volumes/Piggy/Discog/artists-datpiff/*/*.p"):
            if ifile.endswith("datPiffKnown.p"):
                continue
            tmp     = getFile(ifile)
            #print(ifile,'\t',len(tmp))
            results = [x["ArtistName"] for x in tmp]
            for artist in results:
                artists = self.mulArts.getArtistNames(artist)
                for artist in artists.keys():
                    key = artist.title()
                    if len(key) > 1 and key not in known:
                        artistsCntr[key] += 1
        searchTerms = [item[0] for item in artistsCntr.most_common() if item[1] >= minCnts]
        print("There are {0} new searches".format(len(searchTerms)))
        return searchTerms
Ejemplo n.º 4
0
    def getMatchedArtistAlbumsFromDB(self, dbartist, merge=True):
        dbArtistData = self.mdb.getArtistData(dbartist)
        dbsToSearch = self.getArtistDBMatchLists(dbartist)
        albumTypesData = {k: [] for k in [1, 2, 3, 4]}
        for db in dbsToSearch["Matched"]:
            dbIDdata = dbArtistData[db]
            try:
                dbID = dbIDdata["ID"]
            except:
                raise ValueError(
                    "This db {0} should already be known for {1}".format(
                        db, dbartist))

            dbAlbumsData = self.mdb.getArtistAlbumsFromID(db, dbID)

            for albumType in albumTypesData.keys():
                for mediaType, mediaTypeAlbums in dbAlbumsData.items():
                    if mediaType not in self.mdb.getDBAlbumTypeNames(
                            db, albumType):
                        continue
                    #print(db,albumType,mediaType,mediaTypeAlbums)
                    albumTypesData[albumType] += list(mediaTypeAlbums.values())

        albumTypesData = {k: list(set(v)) for k, v in albumTypesData.items()}

        ############################
        ## Merge Albums
        ############################
        if merge is True:
            artistAlbums = getFlatList(albumTypesData.values())
        else:
            artistAlbums = albumTypesData

        return artistAlbums
Ejemplo n.º 5
0
    def manuallyMatchUnknownArtist(self, unknownArtist, cutoff=0.8):
        ######################################################################
        #### Get Unknown Artist Albums and Potential DB Artists
        ######################################################################
        unMatchedAlbums = self.mmb.getUnMatchedAlbumsByArtist(unknownArtist)
        artistNameDBIDs = self.mdb.getArtistIDs(unknownArtist, cutoff=cutoff)

        print("Unknown Artist:   {0}".format(unknownArtist))
        try:
            print("UnMatched Albums: {0}".format(", ".join(unMatchedAlbums)))
        except:
            print("Could not show the unMatched Albums below:")
            print("-> ", unMatchedAlbums, " <-")
        print("=" * 50)
        print(artistNameDBIDs)
        for db, artistDBartists in artistNameDBIDs.items():
            print("=" * 50)
            print("   {0}".format(db))
            for artistDBartist, artistDBIDs in artistDBartists.items():
                print("      {0}".format(artistDBartist))
                for artistDBID in artistDBIDs:
                    artistDBAlbumsFromID = self.mdb.getArtistAlbumsFromID(
                        db, artistDBID)
                    albums = [
                        list(mediaTypeAlbums.values())
                        for mediaTypeAlbums in artistDBAlbumsFromID.values()
                    ]
                    print("mdb.add(\"{0}\", \"{1}\", \"{2}\")".format(
                        unknownArtist, db, artistDBID))
                    print("         {0: <45}\t{1}".format(
                        artistDBID, getFlatList(albums)))
Ejemplo n.º 6
0
    def createMasterDBArtistAlbumsDataFrame(self):
        ts = timestat("=================================== Creating Artist Album DB ===================================")
        
        print("Loading ArtistID Data")
        artistIDtoAlbumNames  = self.disc.getArtistIDToAlbumNamesData()
        
        print("Creating Flattened List for {0} Artists".format(artistIDtoAlbumNames.shape[0]))
        artistIDToAlbumNamesData = artistIDtoAlbumNames.apply(lambda val: getFlatList([mediaData.values() for mediaType,mediaData in val.items()]))
        savename = setFile(self.disc.getDiscogDBDir(), "MasterArtistIDToAlbums.p")
        print("Saving {0}/{1} artists/albums to {2}".format(len(artistIDToAlbumNamesData), artistIDToAlbumNamesData.apply(len).sum(), savename))
        saveFile(ifile=savename, idata=artistIDToAlbumNamesData)
        
        artistIDToNumAlbumsData = artistIDToAlbumNamesData.apply(len)
        artistIDToNumAlbumsData.name = "NumAlbums"
        savename = setFile(self.disc.getDiscogDBDir(), "MasterArtistIDToNumAlbums.p")
        print("Saving {0}/{1} artists/albums to {2}".format(len(artistIDToNumAlbumsData), artistIDToNumAlbumsData.sum(), savename))
        saveFile(ifile=savename, idata=artistIDToNumAlbumsData)
        
        ts.stop()
        return
        

        ts = timestat("=================================== Creating Artist Album DB ===================================")

        print("Creating Pandas DataFrame for {0} Artists".format(artistIDtoAlbumNames.shape[0]))
        cols = ["Albums"]
        discdf = DataFrame(artistIDtoAlbumNames)
        discdf.columns = cols
        print("\tShape --> {0}".format(discdf.shape))

        print("DataFrame Shape is {0}".format(discdf.shape))

        saveFilename = self.disc.getMasterDBArtistAlbumsFilename()
        print("Saving Master Artist Albums DB File: {0}".format(saveFilename))
        saveFile(ifile=saveFilename, idata=discdf, debug=False)

        ts.stop()
Ejemplo n.º 7
0
 def getMappingDirs(self):
     return getFlatList(self.directoryMapping.values())
Ejemplo n.º 8
0
 def getMyOtherMusicAlbums(self, dirval, mname):
     return getFlatList([[p for p in findDirs(setDir(dirval, mdirval))]
                         for mdirval in self.mdirmap.getMapping(mname)])
Ejemplo n.º 9
0
 def getMyMatchedMusicAlbums(self, dirval):
     return getFlatList(
         [[p for p in findSubDirs(setDir(dirval, mdirval), "*")]
          for mdirval in self.mdirmap.getMapping("Match")])
Ejemplo n.º 10
0
    def searchForMutualDBEntries(self,
                                 cutoff=0.875,
                                 maxAdds=50,
                                 start=None,
                                 modVal=100,
                                 dbs=None):
        ######################################################################
        #### Get Map of Artists and Unmatched Albums
        ######################################################################
        dbartists = self.mdb.getArtists()
        cnts = 0
        print("Searching for mutual DB matches for {0} artists".format(
            len(dbartists)))
        for ia, dbartist in enumerate(dbartists):
            if start is not None:
                if ia < start:
                    continue
            if ia % modVal == 0:
                print("## {0: <20} -- {1}".format(
                    "{0}/{1}".format(ia, len(dbartists)), dbartist))
            if cnts >= maxAdds:
                break
            artistAlbums = self.getMatchedArtistAlbumsFromDB(dbartist,
                                                             merge=True)
            dbsToSearch = self.getArtistDBMatchLists(dbartist)

            if dbs is not None:
                usefulDBs = ['Discogs', 'MusicBrainz', 'AllMusic', 'LastFM']
            else:
                usefulDBs = dbs
            usefulDBsToSearch = list(
                set(dbsToSearch["Unmatched"]).intersection(set(usefulDBs)))

            ########################################################
            ## Loop Over Unmatched DBs
            ########################################################
            for db in usefulDBsToSearch:
                dbMatches = {}
                artistDBartists = self.mdb.getArtistDBIDs(dbartist,
                                                          db,
                                                          num=10,
                                                          cutoff=cutoff,
                                                          debug=False)

                for artistDBartist, artistDBIDs in artistDBartists.items():
                    #print('  ',db,'\t',artistDBartist)
                    for artistDBID in artistDBIDs:
                        #print('    ',artistDBID)
                        dbMatches[artistDBID] = {}
                        artistDBAlbumsFromID = self.mdb.getArtistAlbumsFromID(
                            db, artistDBID)

                        albumTypesData = {k: [] for k in [1, 2, 3, 4]}
                        for albumType in albumTypesData.keys():
                            for mediaType, mediaTypeAlbums in artistDBAlbumsFromID.items(
                            ):
                                if mediaType not in self.mdb.getDBAlbumTypeNames(
                                        db, albumType):
                                    continue
                                albumTypesData[albumType] += list(
                                    mediaTypeAlbums.values())

                        albumTypesData = {
                            k: list(set(v))
                            for k, v in albumTypesData.items()
                        }
                        dbArtistAlbums = getFlatList(albumTypesData.values())

                        ma = matchAlbums(cutoff=cutoff)
                        ma.match(artistAlbums, dbArtistAlbums)
                        #ma.show(debug=True)

                        dbMatches[artistDBID] = ma

                if len(dbMatches) > 0:
                    bestMatch = {"ID": None, "Matches": 0, "Score": 0.0}
                    for artistDBID, ma in dbMatches.items():
                        if ma.near == 0:
                            continue
                        if ma.near > bestMatch["Matches"]:
                            bestMatch = {
                                "ID": artistDBID,
                                "Matches": ma.near,
                                "Score": ma.score
                            }
                        elif ma.near == bestMatch["Matches"]:
                            if ma.score > bestMatch["Score"]:
                                bestMatch = {
                                    "ID": artistDBID,
                                    "Matches": ma.near,
                                    "Score": ma.score
                                }

                    if bestMatch["ID"] is not None:
                        cnts += 1
                        print("mdb.add(\"{0}\", \"{1}\", \"{2}\")".format(
                            dbartist, db, bestMatch["ID"]))
Ejemplo n.º 11
0
    def searchForMutualDBEntriesByDB(self,
                                     db,
                                     cutoff=0.875,
                                     maxAdds=50,
                                     start=None,
                                     modVal=100,
                                     maxAlbumsForSearch=500):
        ######################################################################
        #### Get Map of Artists and Unmatched Albums
        ######################################################################
        dbartists = self.mdb.getArtists()
        cnts = 0
        print("Searching for mutual DB matches for {0} artists".format(
            len(dbartists)))
        for ia, dbartist in enumerate(dbartists):
            if start is not None:
                if ia < start:
                    continue
            if ia % modVal == 0:
                print("## {0: <20} -- {1}".format(
                    "{0}/{1}".format(ia, len(dbartists)), dbartist))
            if cnts >= maxAdds:
                break

            status = self.mdb.getArtistDBData(dbartist, db)
            if status["ID"] is not None:
                continue
            artistAlbums = self.getMatchedArtistAlbumsFromDB(dbartist,
                                                             merge=True)

            ########################################################
            ## Loop Over Unmatched DBs
            ########################################################
            dbMatches = {}
            artistDBartists = self.mdb.getArtistDBIDs(dbartist,
                                                      db,
                                                      num=10,
                                                      cutoff=cutoff,
                                                      debug=False)
            matchStatus = True
            for artistDBartist, artistDBIDs in artistDBartists.items():
                if matchStatus is False:
                    continue
                #print('  ',db,'\t',artistDBartist)
                for artistDBID in artistDBIDs:
                    #print('    ',artistDBID)
                    dbMatches[artistDBID] = {}
                    artistDBAlbumsFromID = self.mdb.getArtistAlbumsFromID(
                        db, artistDBID)

                    albumTypesData = {k: [] for k in [1, 2, 3, 4]}
                    for albumType in albumTypesData.keys():
                        for mediaType, mediaTypeAlbums in artistDBAlbumsFromID.items(
                        ):
                            if mediaType not in self.mdb.getDBAlbumTypeNames(
                                    db, albumType):
                                continue
                            albumTypesData[albumType] += list(
                                mediaTypeAlbums.values())

                    albumTypesData = {
                        k: list(set(v))
                        for k, v in albumTypesData.items()
                    }
                    dbArtistAlbums = getFlatList(albumTypesData.values())
                    if len(dbArtistAlbums) > maxAlbumsForSearch:
                        matchStatus = False
                        print(
                            "#\tNot checking {0} because there are {1} > {2} albums"
                            .format(dbartist, len(dbArtistAlbums),
                                    maxAlbumsForSearch))
                        continue

                    ma = matchAlbums(cutoff=cutoff)
                    ma.match(artistAlbums, dbArtistAlbums)
                    #ma.show(debug=True)

                    dbMatches[artistDBID] = ma

            if matchStatus is False:
                continue

            if len(dbMatches) > 0:
                bestMatch = {"ID": None, "Matches": 0, "Score": 0.0}
                for artistDBID, ma in dbMatches.items():
                    if ma.near == 0:
                        continue
                    if ma.near > bestMatch["Matches"]:
                        bestMatch = {
                            "ID": artistDBID,
                            "Matches": ma.near,
                            "Score": ma.score
                        }
                    elif ma.near == bestMatch["Matches"]:
                        if ma.score > bestMatch["Score"]:
                            bestMatch = {
                                "ID": artistDBID,
                                "Matches": ma.near,
                                "Score": ma.score
                            }

                if bestMatch["ID"] is not None:
                    cnts += 1
                    print("mdb.add(\"{0}\", \"{1}\", \"{2}\")".format(
                        dbartist, db, bestMatch["ID"]))
Ejemplo n.º 12
0
 def getMatchedPrimeAlbumDirs(self, primeDir, matchedDirs):
     dirvals     = getFlatList([findDirs(setDir(matchedDir, primeDir)) for matchedDir in matchedDirs])
     artistNames = [normalize('NFC', getDirBasics(dirval)[-1]) for dirval in dirvals]
     return list(zip(artistNames, dirvals))
Ejemplo n.º 13
0
 def getArtistAlbumTypesByArtist(self, artistName, albumType):
     artistData = self.getArtistAlbumsByArtist(artistName)
     if artistData.get(albumType):
         albums = artistData[albumType].albums.values()
         return getFlatList(albums)
     return None