Example #1
0
    def parseSearch(self, modVal, expr=None, force=False, debug=False, quiet=False):
        ts = timestat("Parsing Discogs Search ModVal={0} Files(expr=\'{1}\', force={2}, debug={3}, quiet={4})".format(modVal, expr, force, debug, quiet))
                        
        io = fileIO()

            
        ########################################################################################
        # Previous DB Data
        ########################################################################################
        if not fileUtil(self.disc.getDBModValFilename(modVal)).exists:
            tsDB = timestat("Creating New DB For ModVal={0}".format(modVal))
            dbdata = Series({})
            ts.stop()
        else:
            tsDB = timestat("Loading ModVal={0} DB Data".format(modVal))
            dbdata = self.disc.getDBModValData(modVal)
            tsDB.stop()
            
        
        ########################################################################################
        # Previous Media Data
        ########################################################################################
        previousMetadata = self.disc.getMetadataAlbumData(modVal)
        
        
        ########################################################################################
        # Artist Search Data (No Media)
        ########################################################################################
        tsDB = timestat("Loading Artist Search Data For ModVal={0}".format(modVal))
        artistSearchFilenames = self.getArtistRawFiles(datatype="search", expr=expr, force=True)
        artistSearchFilename = [x for x in artistSearchFilenames if fileUtil(x).basename == "artistData-{0}".format(modVal)]
        if len(artistSearchFilename) == 1:
            artistSearchData = io.get(artistSearchFilename[0])
        else:
            raise ValueError("Could not find Discogs API Artist Search Data")
        tsDB.stop()
        
        
        N = artistSearchData.shape[0]
        modValue = 5000 if N >= 50000 else 1000
        nSave = 0
        tsParse = timestat("Parsing {0} Searched For Discogs API Artists".format(N))
        Nnew = 0
        for i,(artistID,artistData) in enumerate(artistSearchData.iterrows()):
            if (i+1) % modValue == 0 or (i+1) == N:
                tsParse.update(n=i+1, N=N)
            if dbdata.get(artistID) is not None:
                continue
            artistAPIData = {"Artist": artistData, "Albums": previousMetadata.get(artistID, {})}
            dbdata = dbdata.append(Series({artistID: self.artist.getData(artistAPIData)}))
            Nnew += 1
            
        if Nnew > 0:
            print("Saving [{0}/{1}] {2} Entries To {3}".format(len(dbdata), len(dbdata), "ID Data", self.disc.getDBModValFilename(modVal)))
            self.disc.saveDBModValData(modVal=modVal, idata=dbdata)
        else:
            print("Not saving any of the new data")
                
        ts.stop()            
Example #2
0
 def parse(self, expr, force=False, debug=False, quiet=False):
     ts = timestat("Parsing Raw Pickled HTML Files(expr=\'{0}\', force={1}, debug={2}, quiet={3})".format(expr, force, debug, quiet))
     
     io = fileIO()
     newFiles = self.getArtistRawFiles(datatype="data", expr=expr, force=force)
     
     N = len(newFiles)
     modValue = 250 if N >= 500 else 50
     nSave = 0
     tsParse = timestat("Parsing {0} Raw Picked HTML Files".format(N))
     for i,ifile in enumerate(newFiles):
         if (i+1) % modValue == 0 or (i+1) == N or debug:
             tsParse.update(n=i+1, N=N)
         retval   = self.artist.getData(ifile)
         if retval is None:
             if debug:
                 print("Could not find data for {0}".format(ifile))
             continue
         artistID = retval.ID.ID
         if artistID is None:
             if debug:
                 print("Could not find artistID for {0}".format(ifile))
             continue
         savename = self.dutils.getArtistSavename(artistID)
         if isinstance(savename,str) and (force == True or fileUtil(savename).exists == False):
             io.save(idata=retval, ifile=savename)
             nSave += 1
             
     ts.stop()
     print("Saved {0} New Files".format(nSave))
Example #3
0
    def parse(self, modVal, expr, force=False, debug=False, quiet=False):
        ts = timestat("Parsing Primary ModVal={0} Files(expr=\'{1}\', force={2}, debug={3}, quiet={4})".format(modVal, expr, force, debug, quiet))
                
        tsFiles  = timestat("Finding Files To Parse")
        newFiles = self.getArtistPrimaryFiles(modVal, expr, force)
        tsFiles.stop()

        N = len(newFiles)        
        if N == 0:
            ts.stop()
            return
        
        modValue = max([250 * round((N/10)/250), 250])

        if force is True or not fileUtil(self.disc.getDBModValFilename(modVal)).exists:
            tsDB = timestat("Creating New DB For ModVal={0}".format(modVal))
            dbdata = {}
            ts.stop()
        else:
            tsDB = timestat("Loading ModVal={0} DB Data".format(modVal))
            dbdata = self.disc.getDBModValData(modVal)
            tsDB.stop()
            
        newData  = 0
        tsParse = timestat("Parsing {0} New Files For ModVal={1}".format(N, modVal))
        for i,ifile in enumerate(newFiles):
            if (i+1) % modValue == 0 or (i+1) == N:
                tsParse.update(n=i+1, N=N)
                #print("{0: <15}Parsing {1}".format("{0}/{1}".format(i+1,N), ifile))
                
            artistID = getBaseFilename(ifile)
            info     = self.artist.getData(ifile)
            if debug:
                print("\t",ifile,' ==> ',info.ID.ID,' <-> ',artistID)
            if info.ID.ID != artistID:
                if debug is True:
                    print("Error for {0}  ID={1}  FileID={2}".format(info.meta.title,info.ID.ID,artistID))
                    1/0
                continue
            dbdata[artistID] = info
            newData += 1
        tsParse.stop()
            
        if newData > 0:
            dbdata = Series(dbdata)
            print("Saving [{0}/{1}] {2} Entries To {3}".format(newData, len(dbdata), "ID Data", self.disc.getDBModValFilename(modVal)))
            self.disc.saveDBModValData(modVal=modVal, idata=dbdata)
        
        ts.stop()
        
        return newData > 0
Example #4
0
 def parse(self, modVal, expr, force=False, debug=False, quiet=False):
     ts = timestat("Parsing Raw Pickled Spotify API Primary ModVal={0} Files(expr=\'{1}\', force={2}, debug={3}, quiet={4})".format(modVal, expr, force, debug, quiet))
     
     io = fileIO()
     newFiles = self.getArtistPrimaryFiles(modVal, expr, force)
     print("Found {0} New Files".format(len(newFiles)))
     if len(newFiles) == 0:            
         return
     
     artistSearchFilename = self.getArtistRawFiles(datatype="search", expr=expr, force=True)
     if len(artistSearchFilename) == 1:
         artistSearchData = io.get(artistSearchFilename[0])
     else:
         raise ValueError("Could not find Spotify API Artist Search Data")
             
     if force is True or not fileUtil(self.disc.getDBModValFilename(modVal)).exists:
         tsDB = timestat("Creating New DB For ModVal={0}".format(modVal))
         dbdata = Series({})
         ts.stop()
     else:
         tsDB = timestat("Loading ModVal={0} DB Data".format(modVal))
         dbdata = self.disc.getDBModValData(modVal)
         tsDB.stop()
     
     
     N = len(newFiles)
     modValue = 500 if N >= 5000 else 100
     nSave = 0
     tsParse = timestat("Parsing {0} Raw Picked API Files".format(N))
     for i,ifile in enumerate(newFiles):
         dData = io.get(ifile)
         artistID = dData['artistID']
         try:
             artistData = artistSearchData.loc[artistID]
         except:
             print("Could not find Spotify ID [{0}]".format(artistID))
             continue
             
         artistAPIData = {"Artist": artistData, "Albums": dData}
         dbdata = dbdata.append(Series({artistID: self.artist.getData(artistAPIData)}))
         nSave += 1
         
     if nSave > 0:
         print("Saving [{0}/{1}] {2} Entries To {3}".format(nSave, len(dbdata), "ID Data", self.disc.getDBModValFilename(modVal)))
         self.disc.saveDBModValData(modVal=modVal, idata=dbdata)
     else:
         print("Not saving any of the new data")
             
     ts.stop()
Example #5
0
 def parseSearch(self, modVal, expr=None, force=False, debug=False, quiet=False):
     ts = timestat("Parsing Spotify Search ModVal={0} Files(expr=\'{1}\', force={2}, debug={3}, quiet={4})".format(modVal, expr, force, debug, quiet))
             
     if not fileUtil(self.disc.getDBModValFilename(modVal)).exists:
         tsDB = timestat("Creating New DB For ModVal={0}".format(modVal))
         dbdata = Series({})
         ts.stop()
     else:
         tsDB = timestat("Loading ModVal={0} DB Data".format(modVal))
         dbdata = self.disc.getDBModValData(modVal)
         tsDB.stop()
         
     
     io = fileIO()
     artistSearchFilename = self.getArtistRawFiles(datatype="search", expr=expr, force=True)
     if len(artistSearchFilename) == 1:
         artistSearchData = io.get(artistSearchFilename[0])
     else:
         raise ValueError("Could not find Spotify API Artist Search Data")
     #print(artistSearchData.columns)
     
     
     amv = artistModValue()
     idx = artistSearchData.reset_index()['sid'].apply(amv.getModVal) == modVal
     idx.index = artistSearchData.index
     artists = artistSearchData[idx]
     N = artists.shape[0]
     
     tsParse = timestat("Parsing {0} Searched For Spotify API Artists".format(N))
     Nnew = 0
     for artistID,artistData in artists.iterrows():
         if dbdata.get(artistID) is not None:
             continue
         artistAPIData = {"Artist": artistData, "Albums": {}}
         dbdata = dbdata.append(Series({artistID: self.artist.getData(artistAPIData)}))
         Nnew += 1
         
     if Nnew > 0:
         print("Saving [{0}/{1}] {2} Entries To {3}".format(len(dbdata), len(dbdata), "ID Data", self.disc.getDBModValFilename(modVal)))
         self.disc.saveDBModValData(modVal=modVal, idata=dbdata)
     else:
         print("Not saving any of the new data")
             
     ts.stop()
Example #6
0
    def getDataBase(self, inputdata):
        if isinstance(inputdata, dict):
            self.bsdata = inputdata
            return
        elif fileUtil(inputdata).isFile():  ## Assumes str I believe
            ioData = fileIO().get(inputdata)
            if isinstance(ioData, artistDBDataClass):
                self.dbdata = ioData
                return
            elif isinstance(ioData, str):
                try:
                    self.bsdata = getHTML(ioData)
                except:
                    raise ValueError(
                        "Cannot read artist [str] file: {0}".format(inputdata))
                return
            elif isinstance(ioData, bytes):
                try:
                    self.bsdata = getHTML(ioData)
                except:
                    raise ValueError(
                        "Cannot read artist [bytes] file: {0}".format(
                            inputdata))
                return
            elif isBS4(ioData):
                self.bsdata = ioData
                return
            elif isinstance(ioData, dict):
                self.bsdata = ioData
                return
            else:
                raise ValueError("Not sure about file data type: {0}".format(
                    type(ioData)))
        else:
            raise ValueError("Not sure about input type: {0}".format(
                type(inputdata)))

        return
Example #7
0
 def parse(self, expr, force=False, debug=False, quiet=False):
     ts = timestat("Parsing Raw HTML Files(expr=\'{0}\', force={1}, debug={2}, quiet={3})".format(expr, force, debug, quiet))
     
     io = fileIO()
     newFiles = self.getArtistRawHTMLFiles(expr, force=force)
     
     N = len(newFiles)
     modValue = 250 if N >= 500 else 50
     modValue = 500 if N >= 2000 else modValue
     nSave = 0
     tsParse = timestat("Parsing {0} Raw HTML Files".format(N))
     for i,ifile in enumerate(newFiles):
         if (i+1) % modValue == 0 or (i+1) == N:
             tsParse.update(n=i+1, N=N)
         htmldata = io.get(ifile)
         retval   = self.artist.getData(ifile)
         artistID = retval.ID.ID
         savename = self.dutils.getArtistSavename(artistID)
         if isinstance(savename,str) and (force == True or fileUtil(savename).exists == False):
             io.save(idata=retval, ifile=savename)
             nSave += 1
             
     ts.stop()
     print("Saved {0} New Files".format(nSave))
Example #8
0
    def __init__(self):
        self.ignoreDirname = dirUtil(prefix).join("dbdata")
        self.ignoreFilename = fileUtil(
            self.ignoreDirname).join("dbIgnoreData.yaml")

        self.io = fileIO()
Example #9
0
 def parse(self, modVal, expr, force=False, debug=False, quiet=False):
     ts = timestat("Parsing Raw Pickled Discogs API Primary ModVal={0} Files(expr=\'{1}\', force={2}, debug={3}, quiet={4})".format(modVal, expr, force, debug, quiet))
     
     io = fileIO()
     
     ########################################################################################
     # New Files
     ########################################################################################
     tsDB = timestat("Finding New Files For ModVal={0}".format(modVal))
     newFiles = self.getArtistPrimaryFiles(modVal, expr, force=True)        
     print("Found {0} New Files".format(len(newFiles)))
     tsDB.stop()
     if len(newFiles) == 0:            
         return
     
     ########################################################################################
     # Artist Search Data (No Media)
     ########################################################################################
     tsDB = timestat("Loading Artist Search Data For ModVal={0}".format(modVal))
     artistSearchFilenames = self.getArtistRawFiles(datatype="search", expr=expr, force=True)
     artistSearchFilename = [x for x in artistSearchFilenames if fileUtil(x).basename == "artistData-{0}".format(modVal)]
     if len(artistSearchFilename) == 1:
         artistSearchData = io.get(artistSearchFilename[0])
     else:
         raise ValueError("Could not find Discogs API Artist Search Data")
     tsDB.stop()
         
     
     ########################################################################################
     # Previous Media Data
     ########################################################################################
     tsDB = timestat("Loading Media Metadata For ModVal={0}".format(modVal))
     previousMetadata = self.disc.getMetadataAlbumData(modVal)
     tsDB.stop()
     
     ########################################################################################
     # Previous DB Data
     ########################################################################################
     if force is True or not fileUtil(self.disc.getDBModValFilename(modVal)).exists:
         tsDB = timestat("Creating New DB For ModVal={0}".format(modVal))
         dbdata = Series({})
         ts.stop()
     else:
         tsDB = timestat("Loading ModVal={0} DB Data".format(modVal))
         dbdata = self.disc.getDBModValData(modVal)
         tsDB.stop()
     
     
     N = len(newFiles)
     modValue = 2500 if N >= 5000 else 250
     nSave = 0
     tsParse = timestat("Parsing {0} Raw Picked API Files".format(N))
     for i,ifile in enumerate(newFiles):
         if (i+1) % modValue == 0 or (i+1) == N:
             tsParse.update(n=i+1, N=N)
         dData = io.get(ifile)
         artistID = fileUtil(ifile).basename
         try:
             artistData = artistSearchData.loc[artistID]
         except:
             print("Could not find Discogs ID [{0}]".format(artistID))
             continue
             
         artistAPIData = {"Artist": artistData, "Albums": dData}
         retval = Series({artistID: self.artist.getData(artistAPIData)})
         if dbdata.get(artistID) is not None:
             dbdata[artistID] = retval
         else:
             dbdata = dbdata.append(retval)
         nSave += 1
         
     if nSave > 0:
         print("Saving [{0}/{1}] {2} Entries To {3}".format(nSave, len(dbdata), "ID Data", self.disc.getDBModValFilename(modVal)))
         self.disc.saveDBModValData(modVal=modVal, idata=dbdata)
     else:
         print("Not saving any of the new data")
             
     ts.stop()