def parseAndDownloadTeamYearlyStandings(self): files = findExt(self.getSeasonDir(), ext=".p", debug=False) for ifile in files: year = getBaseFilename(ifile) htmldata = getFile(ifile) bsdata = getHTML(htmldata) idVals = {} links = bsdata.findAll("a") for link in links: attrs = link.attrs if attrs.get("data-clubhouse-uid") is not None: href = attrs['href'] name = getBasename(href) idval = getBasename(getDirname(href)) if idVals.get(idval) is not None: if idVals[idval] != name: raise ValueError("Error in ID for this year!") idVals[idval] = name for idVal, name in idVals.items(): self.downloadTeamDataByYear(idVal, name, season=str(year), debug=True)
def processWikiFilmYearlyData(self, procYear=None, debug=False): outdir = self.getDataDir() if procYear == None: files = findExt(outdir, ext=".p") else: files = findPatternExt(outdir, pattern=str(procYear), ext=".p") from collections import OrderedDict movies = OrderedDict() yearlyData = {} for ifile in sorted(files): if debug: print("Processing {0}".format(ifile)) year = getBaseFilename(ifile) movies[year] = self.parseWikiFilmYearlyData(ifile, debug=False) yearlyData[year] = sorted(movies[year].items(), key=operator.itemgetter(1), reverse=False) print("---->", year, " (Top 5/{0} Movies) <----".format(len(yearlyData[year]))) for item in yearlyData[year][:5]: print(item) print('\n') savename = setFile(self.getResultsDir(), "{0}.json".format(self.name)) print("Saving {0} WikiFilm data to {1}".format(len(yearlyData), savename)) saveFile(savename, yearlyData)
def processAACTACategoryData(self, debug=False): outdir = self.getDataDir() files = findExt(outdir, ext="*.p") from collections import OrderedDict movies = OrderedDict() print(files) for ifile in files: if debug: print("Processing {0}".format(ifile)) category = getBaseFilename(ifile) results = self.parseAACTACategoryData(ifile, category, debug=debug) if len(results) == 0: raise ValueError("No results for {0}".format(ifile)) for year, yearData in results.items(): for category, categoryData in yearData.items(): if movies.get(year) is None: movies[year] = [] for movie in categoryData: movies[year].append(movie) for year in movies.keys(): movies[year] = list(set(movies[year])) yearlyMovies = movies[year] movies[year] = [] for movie in yearlyMovies: movies[year].append([movie, 10]) savename = setFile(self.getResultsDir(), "{0}.json".format(self.name)) print("Saving {0} Years of AACTA Data to {1}".format( len(movies), savename)) saveFile(savename, movies)
def createRawOscarData(self, debug=True): print("Checking for poorly parsed oscar data.") indir = self.wikiData.getResultsDir() files = sorted(findExt(indir, ext=".json")) if debug: print("Found {0} oscar files".format(len(files))) yearlyData = {} for ifile in files: year = getBaseFilename(ifile) yearlyData[year] = getFile(ifile) savename = setFile(self.getCorrectionsDir(), "saved.yaml") if not isFile(savename): savedData = {} else: savedData = getFile(savename) for year in savedData.keys(): for title in savedData[year].keys(): savedWinner = savedData[year][title].get("Winner") savedNominees = savedData[year][title].get("Nominees") if savedWinner is not None: print("Overwritting {0} {1} winner".format(year, title)) yearlyData[year][title]["Winner"] = savedWinner if savedNominees is not None: print("Overwritting {0} {1} nominees".format(year, title)) yearlyData[year][title]["Nominees"] = savedNominees savename = setFile(self.getCorrectionsDir(), "raw.yaml") saveFile(idata=yearlyData, ifile=savename)
def getMyBoxSetMusic(self, dirval): musicdata = myArtistAlbumData() for dval in self.directoryMapping["BoxSet"]: boxsetval = join(dirval, dval) for dname in glob(boxsetval): musicdata.albums += [getBaseFilename(x) for x in findAll(dname)] self.updateFileCount(musicdata, dname) return musicdata
def getURL(self): if self.inputdata is not None: artistURL = "https://kworb.net/itunes/artist/{0}.html".format( getBaseFilename(self.inputdata)) auc = artistDBURLClass(url=artistURL) return auc else: auc = artistDBURLClass(url=None, err="NoInput") return auc
def parseUltimateMovieRankingsYearlyData(self, procYear=None, debug=False): outdir = self.getDataDir() if procYear == None: files = findExt(outdir, ext=".p") else: files = findPatternExt(outdir, pattern=str(procYear), ext=".p") from collections import OrderedDict movieData = OrderedDict() for ifile in sorted(files): #ifile = "/Users/tgadfort/Documents/code/movies/ultimatemovierankings/data/2017.p" htmldata = getFile(ifile) bsdata = getHTML(htmldata) year = getBaseFilename(ifile) data = {} done = False tables = bsdata.findAll("table") #, {"id": "table_3"}) movies = {} for it, table in enumerate(tables): ths = table.findAll("th") trs = table.findAll("tr") for itr, tr in enumerate(trs): tds = tr.findAll("td") if len(tds) == 11: val = removeTag(tds[1], 'span') film = val.text film = film.replace(" ({0})".format(year), "") try: rank = float(tds[-1].text) except: try: rank = float(tds[-2].text) except: raise ValueError(tds[-1], tds[-2], tr) movies[film] = rank movieData[year] = movies yearlyData = {} for year in sorted(movieData.keys()): yearlyData[year] = sorted(movieData[year].items(), key=operator.itemgetter(1), reverse=True) print("---->", year, " (Top 5/{0} Movies) <----".format(len(yearlyData[year]))) for item in yearlyData[year][:5]: print(item) print('\n') savename = setFile(self.getResultsDir(), "{0}.json".format(self.name)) print("Saving {0} Years of Ultimate Movie Rankings data to {1}".format( len(yearlyData), savename)) saveFile(savename, yearlyData)
def parse(self, modVal, expr, force=False, debug=False): ts = timestat("Parsing ModVal={0} Unofficial Files".format(modVal)) tsFiles = timestat("Finding Files To Parse") newFiles = self.getArtistUnofficialFiles(modVal, expr, force) tsFiles.stop() N = len(newFiles) modValue = 50 if N >= 100 else 10 if N > 0: tsDB = timestat("Loading ModVal={0} DB Data".format(modVal)) dbdata = self.getDBData(modVal, force) tsDB.stop() newData = 0 tsParse = timestat( "Parsing {0} New Unofficial Files For ModVal={1}".format( N, modVal)) for i, ifile in enumerate(newFiles): if (i + 1) % modValue == 0 or (i + 1) == N: print("{0: <15}Parsing {1}".format("{0}/{1}".format(i + 1, N), ifile)) artistID = getBaseFilename(ifile) info = self.artist.getData(ifile) currentKeys = [] if dbdata.get(artistID) is not None: currentKeys = list(dbdata[artistID].media.media.keys()) else: dbdata[artistID] = info newData += 1 continue keys = list(set(list(info.media.media.keys()) + currentKeys)) for k in keys: v = info.media.media.get(k) if v is None: continue iVal = {v2.code: v2 for v2 in v} dVal = dbdata[artistID].media.media.get(k) if dVal is None: Tretval = iVal else: Tretval = {v2.code: v2 for v2 in dVal} Tretval.update(iVal) dbdata[artistID].media.media[k] = list(Tretval.values()) newData += 1 tsParse.stop() print("Found {0} Unofficial Artist Records For ModVal={1}".format( newData, modVal)) if newData > 0: self.saveDBData(modVal, dbdata, newData)
def parse(self, modVal, expr, force=False, debug=False, quiet=False): ts = timestat("Parsing Primary ModVal={0} Files(expr=\'{1}\', force={2}, debug={3}, quiet={4})".format(modVal, expr, force, debug, quiet)) tsFiles = timestat("Finding Files To Parse") newFiles = self.getArtistPrimaryFiles(modVal, expr, force) tsFiles.stop() N = len(newFiles) if N == 0: ts.stop() return modValue = max([250 * round((N/10)/250), 250]) if force is True or not fileUtil(self.disc.getDBModValFilename(modVal)).exists: tsDB = timestat("Creating New DB For ModVal={0}".format(modVal)) dbdata = {} ts.stop() else: tsDB = timestat("Loading ModVal={0} DB Data".format(modVal)) dbdata = self.disc.getDBModValData(modVal) tsDB.stop() newData = 0 tsParse = timestat("Parsing {0} New Files For ModVal={1}".format(N, modVal)) for i,ifile in enumerate(newFiles): if (i+1) % modValue == 0 or (i+1) == N: tsParse.update(n=i+1, N=N) #print("{0: <15}Parsing {1}".format("{0}/{1}".format(i+1,N), ifile)) artistID = getBaseFilename(ifile) info = self.artist.getData(ifile) if debug: print("\t",ifile,' ==> ',info.ID.ID,' <-> ',artistID) if info.ID.ID != artistID: if debug is True: print("Error for {0} ID={1} FileID={2}".format(info.meta.title,info.ID.ID,artistID)) 1/0 continue dbdata[artistID] = info newData += 1 tsParse.stop() if newData > 0: dbdata = Series(dbdata) print("Saving [{0}/{1}] {2} Entries To {3}".format(newData, len(dbdata), "ID Data", self.disc.getDBModValFilename(modVal))) self.disc.saveDBModValData(modVal=modVal, idata=dbdata) ts.stop() return newData > 0
def parseFilms101Data(self, debug=False): outdir = self.getDataDir() resultsdir = self.getResultsDir() files = findExt(outdir, ext=".p") movies = {} for ifile in sorted(files): year = getBaseFilename(ifile) results = self.parseFilms101YearlyData(ifile, debug=debug) movies[year] = [] for movie in results: movies[year].append([movie,10]) print("Found {0} movies in {1}".format(len(movies[year]),year)) savename = setFile(self.getResultsDir(), "{0}.json".format(self.name)) print("Saving {0} Years of films101 Data to {1}".format(len(movies), savename)) saveFile(savename, movies)
def mergeBoxOfficeMojoResults(self, debug=False): retval = {} files = findExt(self.getResultsDir(), ext=".json") if debug: print("Found {0} files in the results directory".format( len(files))) for ifile in sorted(files): year = getBaseFilename(ifile) try: int(year) except: continue data = getFile(ifile) retval[year] = data if debug: print(" Adding {0} entries from {1}".format(len(data), ifile)) savename = setFile(self.getResultsDir(), "results.json") if debug: print("Saving", len(retval), "years of movie data to", savename) saveFile(savename, retval)
def downloadKWorbSpotifyArtists(self, update=False): url = "https://kworb.net/spotify/artists.html" savename = "kworb_spotifyartists.p" if update is True: self.dutils.downloadArtistURL(url=url, savename=savename, force=True) bsdata = getHTML(savename) data = [] artistDir = self.disc.getArtistsDir() saveDir = setDir(artistDir, "data") print(artistDir) for table in bsdata.findAll("table"): ths = [th.text for th in table.findAll("th")] for tr in table.findAll("tr")[1:]: item = dict(zip(ths, tr.findAll("td"))) data.append(item) print("Found {0} Spotify Artists".format(len(data))) for i, item in enumerate(data): info = item["Artist"] url = info.find('a').attrs['href'] name = info.find('a').text savename = setFile(saveDir, "{0}.p".format(getBaseFilename(url))) if isFile(savename): continue print("Y\t", savename, '\t', url, '\t', name) else: fullURL = "{0}/{1}".format(self.spotifyURL, url) print("{0}/{1}".format(i, len(data)), "\t-\t", savename, '\t', fullURL, '\t', name) try: self.dutils.downloadArtistURL(url=fullURL, savename=savename, force=True) except: print(" ---> Error") sleep(1)
def processWikipediaYearlyData(self, procYear=None, debug=False): outdir = self.getDataDir() if procYear == None: files = findExt(outdir, ext=".p") else: files = findPatternExt(outdir, pattern=str(procYear), ext=".p") from collections import OrderedDict movies = OrderedDict() for ifile in files: if debug: print("Processing {0}".format(ifile)) year = getBaseFilename(ifile) #if year == "1985": continue htmldata = getFile(ifile) bsdata = getHTML(htmldata) results = self.parseWikipediaOscarData(ifile, debug=False) if len(results) == 0: results = self.parseWikipediaOscarDataSpecial(ifile, debug=debug) if len(results) == 0: raise ValueError("No results for {0}".format(ifile)) for k, v in results.items(): print("====>", year, '\t', k) print(" Winner :", results[k]["Winner"]) if debug: print(" Nominees:", results[k]["Nominees"]) print("") savename = setFile(self.getResultsDir(), "{0}.json".format(year)) print("Saving {0} wikipedia oscar data to {1}".format( year, savename)) saveFile(savename, results)
def findMyMovies(self, debug=False): movies = glob("/Volumes/*/Movies/*.*") mine = dict(zip([getBaseFilename(x) for x in movies], movies)) print("Found {0} movies on my disks".format(len(movies))) savename = setFile(self.getDataDir(), "mymovies.json") saveFile(idata=mine, ifile=savename, debug=True)
def collect(self, hist, test=False, debug=False): files = findExt(hist.getGamesResultsDir(), ext=".p", debug=debug) for ifile in files: print(ifile) try: year = int(getBaseFilename(ifile).split("-")[0]) except: raise ValueError("Could not get year from {0}".format(ifile)) if year not in [2014, 2015, 2016]: continue yearData = getFile(ifile) seasonFilename = setFile(hist.getSeasonResultsDir(), "{0}.p".format(year)) seasonData = getFile(seasonFilename) statsData = {} self.runners = {} self.passers = {} self.punters = {} self.kickers = {} self.fgkickers = {} for teamID, teamData in seasonData.teams.items(): games = [x["Game"] for x in teamData.games] for game in games: gameID = game.gameID try: gameData = yearData[gameID] except: continue teamsMetaData = gameData["Teams"] homeTeamMetaData = teamsMetaData["Home"] awayTeamMetaData = teamsMetaData["Away"] driveData = gameData["Plays"] fieldMap = {} fieldMap[ homeTeamMetaData["ID"]] = homeTeamMetaData["Abbrev"] fieldMap[ homeTeamMetaData["Abbrev"]] = homeTeamMetaData["ID"] fieldMap[ awayTeamMetaData["ID"]] = awayTeamMetaData["Abbrev"] fieldMap[ awayTeamMetaData["Abbrev"]] = awayTeamMetaData["ID"] fieldMap["Home"] = homeTeamMetaData["Abbrev"] fieldMap["Away"] = awayTeamMetaData["Abbrev"] copMap = {} copMap[homeTeamMetaData["ID"]] = awayTeamMetaData["ID"] copMap[awayTeamMetaData["ID"]] = homeTeamMetaData["ID"] self.getRunners(driveData, fieldMap, debug=False) self.getPassers(driveData, fieldMap, debug=False) self.getPunters(driveData, fieldMap, debug=False) self.getKickers(driveData, copMap, debug=False) self.getFieldGoalKickers(driveData, fieldMap, debug=False) ### ### Now Assign Player To A Team ### ### Passers from math import sqrt mapping = { "Passers": self.passers, "Runners": self.runners, "Punters": self.punters, "Kickers": self.kickers, "FGKickers": self.fgkickers } for position, players in mapping.items(): for name, passerTeams in players.items(): mc = passerTeams.most_common(1)[0] frac = mc[1] / sum(dict(passerTeams).values()) if frac < 0.75: continue sig = sqrt(sum(dict(passerTeams).values())) if sig < 2: continue teamID = mc[0] if statsData.get(teamID) is None: statsData[teamID] = {} if statsData[teamID].get(position) is None: statsData[teamID][position] = {} statsData[teamID][position][name] = [ round(frac, 1), round(sig, 1) ] ## Show team stats if debug: for teamID, teamStats in statsData.items(): print(teamID) for pos, names in teamStats.items(): statsData[teamID] print('\t', pos, names) if test is False: augmentedStatsFilename = setFile( hist.getStatisticsResultsDir(), "{0}-stats-extra.json".format(year)) saveFile(idata=statsData, ifile=augmentedStatsFilename, debug=True)
def getURL(self): if self.inputdata is not None: artistURL = "https://kworb.net/spotify/artist/{0}.html".format( getBaseFilename(self.inputdata)) auc = artistDBURLClass(url=artistURL) return auc
def parseTeamYearlyStandings(self, startYear=2003, endYear=2018, debug=False, verydebug=False): for year in range(startYear, endYear + 1): seasonDir = self.getYearlySeasonDir(year) files = findExt(seasonDir, ext=".p", debug=False) seasonData = season(year) for ifile in files: nameyear = getBaseFilename(ifile) htmldata = getFile(ifile) bsdata = getHTML(htmldata) teamName = nameyear.replace("-{0}".format(year), "") metadata = bsdata.find("meta", {"property": "og:url"}) if metadata is None: raise ValueError( "Could not find basic team meta data for this file! {0}" .format(ifile)) try: content = metadata.attrs['content'] year = getBasename(content) teamID = getBasename(getDirname(getDirname(content))) except: raise ValueError( "Could not get team year and ID from meta data: {0}". format(metadata)) if verydebug: print(year, '\t', teamID, '\t', ifile) ## Create Team Object teamData = team(year=year, teamName=teamName, teamMascot=None, teamID=teamID) tables = bsdata.findAll("table", {"class": "Table2__table"}) if verydebug: print("\tFound {0} game tables".format(len(tables))) for it, table in enumerate(tables): trs = table.findAll("tr") headers = trs[1] headers = [ x.text for x in headers.findAll("td") if x is not None ] gameRows = trs[2:] totalGames = len(gameRows) if verydebug: print("\tFound {0} potential games".format(totalGames)) for ig, tr in enumerate(gameRows): tds = tr.findAll("td") gameData = dict(zip(headers, tds)) extra = {"OT": False, "Bowl": False} ## Get the Date try: date = gameData["Date"] except: print(ifile) raise ValueError( "No date for this game! {0}".format(gameData)) date = date.text ## Only Keep Games With Regular Dates try: dateval = "{0} {1}".format( date.split(", ")[-1], year) date = getDateTime(dateval) except: date = None if date is None: continue ## Check for January Games (in the following year) if date.month == 1: date = addMonths(date, 12) ## Get the Opponent try: opponent = gameData["Opponent"] except: raise ValueError( "No opponent for this game! {0}".format(game)) try: oppolink = opponent.find("a") oppohref = oppolink.attrs['href'] opponame = getBasename(oppohref) oppoID = getBasename(getDirname(oppohref)) except: opponame = opponent.text oppoID = 0 #raise ValueError("Could not find href in link! {0}".format(opponent)) try: gamespan = opponent.find("span", {"class": "pr2"}) gametype = gamespan.text except: raise ValueError( "Could not find game type from {0}".format( opponent)) if gametype == "vs": location = teamID elif gametype == "@": location = oppoID else: raise ValueError( "Location --> {0}".format(gametype)) if verydebug: print("\t{0}/{1}\t{2}\t{3: <4}{4: <50}".format( ig, totalGames, printDateTime(date), gametype, opponame), end="\t") ## Get the Result try: result = gameData["Result"] except: raise ValueError( "No result for this game! {0}".format(game)) spans = result.findAll("span") if len(spans) == 0: continue if len(spans) != 2: raise ValueError( "There are {0} spans in this row!: {1}".format( len(spans), result)) outcome = spans[0].text.strip() score = spans[1].text.strip() if score.endswith("OT"): extra = {"OT": True} score = score[:-3].strip() try: scores = [int(x) for x in score.split('-')] except: raise ValueError( "Could not create integer scores from {0}". format(spans)) if outcome == 'W': teamScore = scores[0] oppoScore = scores[1] teamResult = "W" oppoResult = "L" elif outcome == "L": teamScore = scores[1] oppoScore = scores[0] teamResult = "L" oppoResult = "W" elif outcome == "T": teamScore = scores[0] oppoScore = scores[1] teamResult = "T" oppoResult = "T" else: raise ValueError( "Did not recognize game outcome {0}".format( outcome)) ## Get the Game try: gamelink = result.find("a") gamehref = gamelink.attrs['href'] except: raise ValueError( "Could not find href in link! {0}".format( result)) if verydebug: print("{0} {1}".format( teamResult, "-".join( str(x) for x in [teamScore, oppoScore]))) ## Create game object gameData = game(gameID=gameID, date=date, teamA=teamID, teamB=oppoID, teamAResult=teamResult, teamBResult=oppoResult, teamAScore=teamScore, teamBScore=oppoScore, location=location) ## Append game to team data teamData.addGame(gameData) ## Show Summary teamData.setStatistics() if debug: teamData.summary() if teamData.ngames == 0: removeFile(ifile, debug=True) seasonData.addTeam(teamData) #http://www.espn.com/college-football/team/schedule/_/id/201/season/2005" savename = setFile(self.getSeasonResultsDir(), "{0}.p".format(year)) saveFile(idata=seasonData, ifile=savename, debug=True)
def parseGameData(self, startYear=2003, endYear=2018, debug=False, verydebug=False): noData = {} for year in range(startYear, endYear + 1): yearData = {} gamesDir = self.getYearlyGamesDir(year) files = findExt(gamesDir, ext=".p", debug=False) noData[year] = [] for i, ifile in enumerate(files): gameID = getBaseFilename(ifile) if gameID in self.noGameData: continue htmldata = getFile(ifile) bsdata = getHTML(htmldata) #print(bsdata) #verydebug=True #if gameID not in ['400603866']: # continue teamData = bsdata.findAll("div", {"class": "team-container"}) longNames = [ x.find("span", {"class": "long-name"}) for x in teamData ] longNames = [x.text for x in longNames if x is not None] shortNames = [ x.find("span", {"class": "short-name"}) for x in teamData ] shortNames = [x.text for x in shortNames if x is not None] teamAbbrevs = [ x.find("span", {"class": "abbrev"}) for x in teamData ] teamNames = [x.attrs for x in teamAbbrevs if x is not None] teamNames = [x['title'] for x in teamNames] teamAbbrevs = [x.text for x in teamAbbrevs] teamIDs = [ x.find("img", {"class": "team-logo"}) for x in teamData ] teamIDs = [x.attrs for x in teamIDs if x is not None] teamIDs = [x['src'] for x in teamIDs] teamIDs = [re.search(r"(\d+).png", x) for x in teamIDs] teamIDs = [x.groups()[0] for x in teamIDs] awayTeam = { "Name": longNames[0], "Mascot": shortNames[0], "Abbrev": teamAbbrevs[0], "ID": teamIDs[0] } homeTeam = { "Name": longNames[1], "Mascot": shortNames[1], "Abbrev": teamAbbrevs[1], "ID": teamIDs[1] } metadata = bsdata.find("meta", {"property": "og:title"}) title = None if metadata is not None: title = metadata.attrs['content'] if verydebug: print("==> {0}".format(title)) ## Possesions posData = bsdata.find("ul", {"class": "css-accordion"}) if posData is None: posData = bsdata.find("article", {"class": "play-by-play"}) if posData is None: noData[year].append(gameID) if verydebug: print("Could not find possession data! {0}".format( gameID)) continue #print(bsdata) #1/0 #removeFile(ifile, debug) #continue gameData = { "Teams": { "Away": awayTeam, "Home": homeTeam }, "Plays": [] } if i % 10 == 0: print("{0}/{1} with {2} no data games".format( i, len(files), len(noData[year]))) ################### ## Get Full Drive Data ################### drives = posData.findAll("li", {"class": "accordion-item"}) if verydebug: print("Drives {0}".format(len(drives))) for idr, drive in enumerate(drives): ## Get Drive Summary headlines = [ x.text.strip() for x in drive.findAll("span", {"class": "headline"}) ] if verydebug: print("Headlines {0}".format(len(headlines))) ## Get Drive Details details = [ x.text.strip() for x in drive.findAll( "span", {"class": "drive-details"}) ] if verydebug: print("Details {0}".format(len(details))) ## Get Home Score homescores = drive.findAll("span", {"class": "home"}) homescores = [ x.find("span", {"class": "team-score"}) for x in homescores ] homescores = [x.text for x in homescores if x is not None] if verydebug: print("Home Scores {0}".format(len(homescores))) ## Get Away Score awayscores = drive.findAll("span", {"class": "away"}) awayscores = [ x.find("span", {"class": "team-score"}) for x in awayscores ] awayscores = [x.text for x in awayscores if x is not None] if verydebug: print("Away Scores {0}".format(len(awayscores))) ## Get Possession possessions = drive.findAll("span", {"class": "home-logo"}) possessions = [ x.find("img", {"class": "team-logo"}) for x in possessions ] possessions = [ x.attrs['src'] for x in possessions if x is not None ] possessions = [x.split('&')[0] for x in possessions] possessions = [getBaseFilename(x) for x in possessions] if verydebug: print("Possessions {0}".format(len(possessions))) ## Check for valid headline (parsed correctly?) if len(headlines) == 0: continue validFGs = [ "Missed FG", "Field Goal", "FIELD GOAL", "MISSED FG", "Made FG", "Field Goal Good", "Field Goal Missed", "Blocked FG" ] validTDs = [ "Touchdown", "TOUCHDOWN", "END OF HALF Touchdown", "Downs Touchdown", "Missed FG Touchdown", "End of Half Touchdown", "End of Game Touchdown", "PUNT Touchdown", "FUMBLE Touchdown", "INTERCEPTION Touchdown", "FIELD GOAL Touchdown", "MISSED FG Touchdown", "Rushing Touchdown", "Passing Touchdown", "Kickoff Return Touchdown", "Interception Return Touch", "Turnover on Downs Touchdown", "Field Goal Missed Touchdown", "Field Goal Touchdown", "Rushing Touchdown Touchdown", "Field Goal Good Touchdown", "Passing Touchdown Touchdown", "Fumble Return Touchdown Touchdown", "Rushing TD", "Passing TD", "Blocked Punt TD", "Punt Return TD", "Fumble Ret. TD", "Interception TD", "Fumble TD", "Rushing TD Touchdown", "Blocked Punt TD Touchdown", "Blocked FG (TD)", "Punt Return TD Touchdown", "Kick Return TD", "Kickoff Return Touchdown Touchdown", "Missed FG (TD) Touchdown", "Blocked FG (TD) Touchdown", "Punt Return Touchdown Touchdown", "Interception Return Touch Touchdown" ] validEnds = [ "End of Half", "End of Game", "END OF HALF", "END OF GAME", "End of 4th Quarter" ] validTOs = [ "Fumble", "Interception", "FUMBLE", "INTERCEPTION", "Kickoff", "KICKOFF", "Blocked Punt" ] validTOPnts = [ "Interception Touchdown", "Safety", "Punt Touchdown", "Fumble Touchdown", "Punt Return Touchdown", "Fumble Return Touchdown", "SAFETY" ] validDowns = [ "Punt", "Downs", "PUNT", "Possession (For OT Drives)", "DOWNS", "Possession (For OT Drives) Touchdown", "Turnover on Downs", "Poss. on downs", "Penalty" ] validPlay = [ "Rush", "Pass", "Sack", "Timeout", "Incomplete", "Pass Complete" ] valid2PT = ["2PT Pass failed", "Missed PAT Return"] validOdds = ["on-side kick"] validHeadlines = validFGs + validTDs + validEnds + validTOs + validTOPnts + validDowns + validPlay + valid2PT isValidHeadline = sum( [x in validHeadlines for x in headlines]) if headlines[0] == '': continue if isValidHeadline == 0 and idr < len(drives) - 1: print(idr, '/', len(drives)) print(title) print(ifile) #print(bsdata) raise ValueError( "No valid headline in {0}".format(headlines)) print("No valid headline in {0}".format(headlines)) continue ## Analyze Play-by-Play try: driveList = drive.find("ul", {"class": "drive-list"}) plays = driveList.findAll("li") except: raise ValueError( "Could not find drive list in drive {0}".format( drive)) driveData = [] for ip, play in enumerate(plays): ## Check for Starting Position startPos = play.find("h3") if startPos is None: raise ValueError( "Could not find Starting Position in Play! {0}" .format(play)) startData = startPos.text.strip() ## Check for Play Text span = play.find("span", {"class": "post-play"}) if span is None: raise ValueError( "Could not find post play data! {0}".format( play)) playData = span.text.strip() driveData.append({ "Play": ip, "Start": startData, "Data": playData }) #print(idr,'\t',ip,'\t',startData,'\t',playData) ## Save Drive Data gameData["Plays"].append({ "Drive": len(gameData), "Headline": headlines, "Detail": details, "HomeScore": homescores, "AwayScore": awayscores, "Possession": possessions, "Data": driveData }) if verydebug: print(idr, '\t', headlines) print(idr, '\t', details) print(idr, '\t', homescores) print(idr, '\t', awayscores) print(idr, '\t', possessions) print("") if verydebug: print("Found {0} drives for gameID {1}".format( len(gameData), gameID)) yearData[gameID] = gameData print("Parsed {0}/{1} games in {2}".format(len(yearData), len(files), year)) savename = setFile(self.getGamesResultsDir(), "{0}-games.p".format(year)) saveFile(idata=yearData, ifile=savename, debug=True) return noData
def testAlbum(albumDir, artistDir, files): retval = { "Track": False, "Album": False, "Title": False, "Multi": False, "Skip": False, "Extra": False, "Mix": False } artistName = getDirBasics(artistDir)[-1] if artistName in skipDirs(): retval["Skip"] = True #print("artistDir",artistDir) #print("albumDir",albumDir) #print("artistName",artistName) albumName = albumDir.replace(artistDir, "")[1:] if "/" in albumName: retval["Extra"] = True albumDirs = albumName.split("/") if albumDirs[0] in skipDirs(): retval["Skip"] = True if albumName in skipDirs(): retval["Skip"] = True #print("albumName",albumName) j = 0 tags = {} print("\t-----> Album Info: {0} / {1} \t ==> {2} Songs".format( artistName, albumName, len(files))) if retval["Extra"] is True: return retval if retval["Skip"] is True: return retval ifiles = [] for jf, ifile in enumerate(files): results = MusicID(ifile, debug=args.debug) if results.skip is True: continue tags[j] = results.getInfo() ifiles.append(ifile) #pbcs[j] = pb.getPaths(ifile).getDict() j += 1 nfiles = j ## Track Tests testTrackNo = True trackCheckSum = sum(range(1, nfiles + 1)) trackTrackSum = 0 ## Album Tests testAlbum = testCD(albumName) or testDisc(albumName) retval["Multi"] = testAlbum retval["Mix"] = testMix(albumName) ## Title Tests testTitle = True for j in range(nfiles): ifile = ifiles[j] tag = tags[j] #pbc = pbcs[j] ############################################################################################### ## Album Tests ############################################################################################### albumTag = tag.get("Album") if albumTag is None: print("Album Name Error ==> [{0}]".format("No Album Tag")) retval["Album"] = True break try: albumName = albumTag[0] albumName = albumName.replace("/", " ") except: print("Track Number Error ==> [{0}]".format("No Value")) trackNo = "" if len(albumName) == 0: retval["Album"] = True break dirvals = getDirBasics(getDirname(ifile)) if albumName not in dirvals: retval["Album"] = True ############################################################################################### ## Track Number Tests ############################################################################################### trkTag = tag.get("TrackNo") if trkTag is None: print("Track Number Error ==> [{0}]".format("No TrackNo Tag")) retval["Track"] = True break try: trackNo = trkTag[0] except: print("Track Number Error ==> [{0}]".format("No Value")) trackNo = "" if len(trackNo) == 0: retval["Track"] = True break trackNumberValue = None try: trackNumberValue = int(trackNo) except: try: trackVals = [int(x) for x in trackNo.split("/")] trackNumberValue = trackVals[0] except: print("Track Number Error ==> [{0}]".format(trackNo)) try: trackTrackSum += trackNumberValue except: pass ############################################################################################### ## Title Number Tests ############################################################################################### titleTag = tag.get("Title") if titleTag is None: print("Title Error ==> [{0}]".format("No Title Tag")) retval["Title"] = True break try: title = titleTag[0] except: print("Title Error ==> [{0}]".format(titleTag)) testTitle = False break if testMix(title): print("Possible Mix ==> [{0}]".format(title)) retval["Mix"] = True break fileName = getBaseFilename(ifile) if not any([title in fileName, fileName in title]): print("Title Error ==> [{0}] not [{1}]".format(title, fileName)) retval["Title"] = True break if sum(retval.values()) == 0: if trackTrackSum != trackCheckSum: print("Problem with track numbering...") print(" Expected {0} and found {1}".format( trackCheckSum, trackTrackSum)) retval["Track"] = True #print(retval) return retval
def parseArtistModValFiles(self, modVal, previousDays=5, force=False, debug=False, doExtra=False): print("-"*100) print("Parsing Artist Files For ModVal {0}".format(modVal)) artistInfo = self.artist artistDir = self.disc.getArtistsDir() maxModVal = self.disc.getMaxModVal() artistDBDir = self.disc.getArtistsDBDir() dirVal = setDir(artistDir, str(modVal)) dbname = setFile(artistDBDir, "{0}-DB.p".format(modVal)) newFiles = self.getArtistModValFiles(modVal, previousDays=previousDays, force=force) if force is False: dbdata = getFile(dbname, version=3) else: print("Forcing Reloads of ModVal={0}".format(modVal)) print(" Processing {0} files.".format(len(newFiles))) dbdata = {} saveIt = 0 for j,ifile in enumerate(newFiles): if force is True: if j % 100 == 0: print("\tProcessed {0}/{1} files.".format(j,len(newFiles))) artistID = getBaseFilename(ifile) isKnown = dbdata.get(artistID) info = artistInfo.getData(ifile) if info.ID.ID != artistID: print("File: {0}".format(ifile)) print(info.show()) 1/0 # Check Profile try: artistName = info.profile.search if artistName is not None: self.creditToDownload[artistID] = [artistName,self.getArtistSavename(artistID, credit=True)] except: pass if debug is False: continue print("ID From Name: {0}".format(artistID)) print("ID From File: {0}".format(info.ID.ID)) print("File: {0}".format(ifile)) print("Info: {0}".format(info.url.get())) continue #1/0 saveIt += 1 dbdata[artistID] = info forceSave = False if saveIt > 0 and doExtra is True: print("\tCalling Extra Parsing") dbdata = self.parseArtistModValExtraFiles(modVal, dbdata=dbdata, force=force, debug=debug) forceSave = True saveIt = len(dbdata) if len(self.creditToDownload) > 0: print("Found {0} artists that need new downloads.".format(len(self.creditToDownload))) if saveIt > 0 or forceSave is True: savename = setFile(artistDBDir, "{0}-DB.p".format(modVal)) print("Saving {0} new artist IDs to {1}".format(saveIt, savename)) dbNumAlbums = sum([self.getArtistNumAlbums(artistData) for artistData in dbdata.values()]) print("Saving {0} total artist media".format(dbNumAlbums)) saveFile(idata=dbdata, ifile=savename) self.createArtistModValMetadata(modVal=modVal, db=dbdata, debug=debug) self.createArtistAlbumModValMetadata(modVal=modVal, db=dbdata, debug=debug) return saveIt
def parse(self, modVal, expr, force=False, debug=False): ts = timestat("Parsing ModVal={0} Credit Files".format(modVal)) tsFiles = timestat("Finding Files To Parse") newFiles = self.getArtistCreditFiles(modVal, expr, force) tsFiles.stop() N = len(newFiles) modValue = 500 if N >= 1000 else 100 if N > 0: tsDB = timestat("Loading ModVal={0} DB Data".format(modVal)) dbdata = self.disc.getDBModValData( modVal).to_dict() ## We do not want to overwrite other data tsDB.stop() newData = 0 newIDs = 0 tsParse = timestat( "Parsing {0} New Credit Files For ModVal={1}".format(N, modVal)) for i, ifile in enumerate(newFiles): if (i + 1) % modValue == 0 or (i + 1) == N: print("{0: <15}Parsing {1}".format("{0}/{1}".format(i + 1, N), ifile)) artistID = getBaseFilename(ifile) ######################################## # Test For Previous Entries ######################################## if dbdata.get(artistID) is not None: if dbdata[artistID].media.media.get("Credits") is not None: continue currentKeys = [] info = self.artist.getData(ifile) if dbdata.get(artistID) is not None: currentKeys = list(dbdata[artistID].media.media.keys()) else: dbdata[artistID] = info newData += 1 newIDs += 1 continue keys = list(set(list(info.media.media.keys()) + currentKeys)) for k in keys: v = info.media.media.get(k) if v is None: continue iVal = {v2.code: v2 for v2 in v} dVal = dbdata[artistID].media.media.get(k) if dVal is None: Tretval = iVal else: Tretval = {v2.code: v2 for v2 in dVal} Tretval.update(iVal) dbdata[artistID].media.media[k] = list(Tretval.values()) newData += 1 ######################################## # Update Profile If Needed ######################################## extra = info.profile.extra newTabs = extra.get("Tabs", {}) if isinstance(extra, dict) else {} currentExtra = dbdata[artistID].profile.extra currentTabs = currentExtra.get("Tabs", {}) if isinstance( currentExtra, dict) else {} if len(currentTabs) == 0 and len(newTabs) > 0: dbdata[artistID].profile.extra["Tabs"] = newTabs if len(currentTabs) > 0 and len(newTabs) > 0: for tab, tabURL in newTabs.items(): if currentTabs.get(tab) is None: dbdata[artistID].profile.extra["Tabs"][tab] = tabURL if newData > 0: dbdata = Series(dbdata) print("Saving {0} Credit Entries".format(newData)) print("Saving {0} New Entries".format(newIDs)) self.disc.saveDBModValData( idata=dbdata, modVal=modVal) ## We do not want to overwrite other data else: print("Not Saving Any New Entries") tsParse.stop()
def createCreditMetadata(self, modVal=None): modVals = [modVal] if modVal is not None else range(100) ts = timestat("Creating AllMusic Credit Metadata") for modVal in modVals: tsFiles = timestat( "Finding Primary Files For ModVal={0}".format(modVal)) modValPrimaryFiles = self.getArtistPrimaryFiles(modVal, expr=None, force=True) tsFiles.stop() tsIgnore = timestat( "Removing IDs To Ignore From {0} Primary Files For ModVal={0}". format(len(modValPrimaryFiles), modVal)) modValPrimaryGoodFiles = [ ifile for ifile in modValPrimaryFiles if getBaseFilename(ifile) not in self.creditIgnores ] tsIgnore.stop() tsDBData = timestat( "Finding Known Artists From {0} Primary/Good Files For ModVal={1}" .format(len(modValPrimaryGoodFiles), modVal)) dbData = self.disc.getDBModValData(modVal) missingArtistIDFiles = [ ifile for ifile in modValPrimaryFiles if dbData.get(getBaseFilename(ifile)) is None ] tsDBData.stop() tsCredit = timestat( "Finding Known Credit Artists From {0} Unknown Artists For ModVal={1}" .format(len(missingArtistIDFiles), modVal)) creditFiles = { getBaseFilename(ifile): ifile for ifile in self.dbCredit.getArtistCreditFiles( modVal, expr=None, force=True) } missingCreditIDs = [ ifile for ifile in missingArtistIDFiles if creditFiles.get(getBaseFilename(ifile)) is None ] tsCredit.stop() tsMeta = timestat( "Finding Metadata For {0}/{1}/{2}/{3} Missing ArtistIDs for ModVal={4}" .format(len(missingCreditIDs), len(missingArtistIDFiles), len(modValPrimaryGoodFiles), len(modValPrimaryFiles), modVal)) metaData = { getBaseFilename(ifile): self.artist.getData(ifile).meta for ifile in missingCreditIDs } self.metadata[modVal] = { artistID: { "title": meta.title, "url": meta.url } for artistID, meta in metaData.items() } tsMeta.stop() ts.stop()
def createUnofficialMetadata(self, modVal=None): modVals = [modVal] if modVal is not None else range(100) ts = timestat("Creating Unofficial Files Metadata") for modVal in modVals: tsDBData = timestat( "Finding Pages/URL/MediaCounts Data For ModVal={0}".format( modVal)) dbData = self.getDBData(modVal) dbArtistURLMedia = { artistID: { "Name": artistData.artist.name, "URL": artistData.url.url, "MediaCounts": artistData.mediaCounts.counts.get('Unofficial') } for artistID, artistData in dbData.items() } tsDBData.stop() tsMedia = timestat( "Finding Artists With Unofficial MediaCounts From {0} Artists For ModVal={1}" .format(len(dbArtistURLMedia), modVal)) unofficialData = { artistID: artistData for artistID, artistData in dbArtistURLMedia.items() if artistData["MediaCounts"] is not None } tsMedia.stop() tsIgnore = timestat( "Removing Ignored Artists From {0} Artists For ModVal={1}". format(len(unofficialData), modVal)) ignoreData = { artistID: artistData for artistID, artistData in unofficialData.items() if artistData["Name"] not in self.unofficialIgnores } tsIgnore.stop() tsUnofficial = timestat( "Finding Known Unofficial Artists From {0} Unofficial Artists For ModVal={1}" .format(len(ignoreData), modVal)) unofficialFiles = { getBaseFilename(ifile): ifile for ifile in self.dbUnofficial.getArtistUnofficialFiles( modVal, expr=None, force=True) } missingUnofficialIDs = { artistID: artistData for artistID, artistData in ignoreData.items() if unofficialFiles.get(artistID) is None } #return ignoreData, unofficialFiles, missingUnofficialIDs, unofficialData, dbArtistURLMedia tsUnofficial.stop() tsMeta = timestat( "Saving Metadata From {0}/{1}/{2}/{3} Artists For ModVal={4}". format(len(missingUnofficialIDs), len(ignoreData), len(unofficialData), len(dbArtistURLMedia), modVal)) self.metadata[modVal] = missingUnofficialIDs tsMeta.stop() ts.stop()
def parseGames(self, gameID=None, test=False, debug=False, verydebug=False): self.logger.info("Parsing Games") if self.hist is None: raise ValueError("Must set historical class!") sep = "======================================================" if verydebug: debug = True self.unknownPlays = [] self.toughParsing = ['400547724'] self.poorlyParsed = [ '401012731', '400547781', '400547808', '400548070', '400548428', '400610207', '400547822' ] self.poorlyParsed += [ '400547970', '400547835', '400548026', '400548167', '400547827', '400548458' ] self.poorlyParsed += [ '400547901', '400547976', '400548246', '400548278', '400548292', '400548448' ] self.statsToGet = {} self.badGames = {} self.goodGames = {} dc = debugclass() files = self.hist.getGamesResultsFiles() for ifile in files: try: year = int(getBaseFilename(ifile).split("-")[0]) except: raise ValueError("Could not get year from {0}".format(ifile)) if year != 2018: continue self.logger.info(" Parsing Games from {0}".format(year)) yearData = getFile(ifile) seasonData = self.hist.getSeasonResultsData(year) statsData = self.hist.getStatisticsResultsData(year) augmentedStatsData = self.hist.getStatisticsAugmentedData(year) totalGames = 0 for gameIdent, gameData in yearData.items(): if gameID is not None: if gameID != gameIdent: continue if gameIdent in self.hist.noGameData or gameIdent in self.poorlyParsed or gameIdent in self.toughParsing: continue self.logger.info(" Parsing Game ID {0}".format(gameIdent)) teamsMetaData = gameData["Teams"] homeTeamMetaData = teamsMetaData["Home"] awayTeamMetaData = teamsMetaData["Away"] driveData = gameData["Plays"] ################################################################################ ### Get maps ################################################################################ fieldMap = self.makeFieldMap(awayTeamMetaData, homeTeamMetaData) copmap = self.makeCopMap(awayTeamMetaData, homeTeamMetaData) ################################################################################ ### Get team data ################################################################################ homeTeamGameData = self.getTeamGameData( gameIdent, seasonData, homeTeamMetaData) if homeTeamGameData is None: continue awayTeamGameData = self.getTeamGameData( gameIdent, seasonData, awayTeamMetaData) if awayTeamGameData is None: continue ################################################################################ ### Learn key plays for use in determining possession ################################################################################ players = gameplayers(teamsMap=fieldMap, statsData=statsData) players.augmentData(augmentedStatsData) pfp = possessionfromplayer(players) ps = playstart() pc = playclock() pt = playtype() ap = analyzepossession(copmap, players) ay = analyzeyards() ak = analyzekicking() apen = analyzepenalties() pcc = possessionchangeclass(copmap) gameResult = [] totalPlays = 0 ################################################################################ ### Collect Post Drive Scores ################################################################################ postDriveScores = {"Drives": [], "Final": []} postDriveScores["Final"] = [ awayTeamGameData.teamAScore, homeTeamGameData.teamAScore ] ################################################################################ ### Iterate over drives ################################################################################ for idr, drive in enumerate(driveData): self.logger.debug("\n\n{0}".format(2 * sep)) self.logger.debug("{0}Parsing Plays for Drive {1}".format( self.ind, idr)) ds = drivesummary(drive, fieldMap) drivePlays = ds.getDrivePlays() postDriveScores["Drives"].append(ds.getPostDriveScore()) ################################################################################ ### Iterate over plays in drive ################################################################################ driveResults = [] for ipl, drivePlay in enumerate(drivePlays): #playNo = drivePlay['Play'] playStartText = drivePlay['Start'] playData = drivePlay['Data'] self.logger.debug("\n {0}".format(sep)) self.logger.debug("{0} Play {1}/{2}: {3}".format( self.ind, ipl, len(drivePlays), playData)) ### Determine play starting position startVals = ps.getStart(playStartText) ### Determine play clock and quarter pc.parsePlay(playData) startVals.setClock(pc) playText = pc.getPlayText() ### Determine play type play = pt.getPlay(playText) # ### Determine possession playPossession = pfp.determinePossession(play) if playPossession.isForced() is not None: play.pa.forced = playPossession.isForced() ### Result of play playResult = playsummary(possession=playPossession, start=startVals, play=play, valid=play.valid) ### Save and move on totalPlays += 1 driveResults.append(playResult) continue ### Check if we need to insert a play #newPlay = self.addPlay(gameIdent, idr, ipl, playResult, possData, verydebug) #if newPlay is not None: # totalPlays += 1 # driveResults.append(newPlay) fullDriveData = ds.getFullDrive() fullDriveData.setPlays(driveResults) #fullDriveData = self.insertMissingData(gameIdent, idr, fullDriveData, debug) #fullDriveData = self.augmentPlayWithScore(fullDriveData, fieldMap, debug) gameResult.append(fullDriveData) ################################################################################ ### Show State Before Alterations ################################################################################ self.logger.debug("\n{0}".format(2 * sep)) self.logger.debug("{0}Found {1} Drives For This Game".format( self.ind, len(gameResult))) self.logger.debug("\n{0}\n".format(2 * sep)) ################################################################################ ### Analyze Possession ################################################################################ dc.showGame(gameResult) gameResult = ap.continuity(gameResult) gameResult = pcc.splitChangeOfPossession(gameResult) gameResult = ap.continuity(gameResult) gameResult = ap.returns(gameResult) gameResult = ap.pats(gameResult) gameResult = ap.endofgame(gameResult, postDriveScores) gameResult = ap.noplays(gameResult) gameResult = ap.nextplay(gameResult) gameResult = ap.endofdrive(gameResult) dc.showGame(gameResult, "Game") gameResult = ay.analyze(gameResult) gameResult = ak.kickoffs(gameResult) gameResult = ak.returns(gameResult) #gameResult = apen.penalties(gameResult) #gameResult = apen.isPenaltyAdditive(gameResult) scoreResult = ap.gamescore(gameResult, postDriveScores) if scoreResult is False: if gameID is None: self.badGames[gameIdent] = True continue dc.showGame(gameResult, "Score Is Not Corrent") dc.showGame(gameResult, "Good Game") totalGames += 1 if debug: self.logger.info("Found {0} plays in this game {1}".format( totalPlays, gameID)) self.logger.info("Found {0} total games for {1}".format( totalGames, ifile))
def getMedia(self): amc = artistDBMediaClass() mediaType = "Videos" amc.media[mediaType] = [] for table in self.bsdata.findAll("table"): trs = table.findAll("tr") ths = [th.text for th in table.findAll("th")] for tr in trs[1:]: td = tr.find('td') ref = td.find("a") name = td.text url = None if ref is not None: url = ref.attrs['href'] #https://kworb.net/youtube/video/fRh_vgS2dFE.html trackURL = "https://kworb.net/youtube/video/{0}.html".format( getBaseFilename(url)) songData = name.split(' - ') artistName = songData[0] trackName = " - ".join(songData[1:]) removes = [] removes = [ "(Official Music Video)", "(Official Lyric Video)", "(Official Video (Short Version))", "(Official Video)", "[Lyric Video]", "(Video Version)", "[Official Music Video]", "(Official Audio)", "(Shazam Version)", "(Explicit)", "(Dance Video)", "(Lyric Video)", "[Official Video]", "(Official Dance Video)", '(Acoustic)', '(Audio)', '(Visualizer)', '(Video Commentary)', '(VEVO Footnotes)', '(Choir Version)', '(Fan Lip Sync Version)', '(Trailer)', '(Teaser)' ] for rmText in removes: trackName = trackName.replace(rmText, "").strip() while trackName.find(" ") != -1: trackName = trackName.replace(" ", " ") if len(trackName) <= 1: break if len(trackName.strip()) == 0: continue code = self.dbUtils.getAlbumCode(name=trackName, url=trackURL) amdc = artistDBMediaDataClass(album=trackName, url=trackURL, aclass=None, aformat=None, artist=artistName, code=code, year=None) if amc.media.get(mediaType) is None: amc.media[mediaType] = [] amc.media[mediaType].append(amdc) return amc
def genMIDTags(albumDir, artistDir, files, args): retval = { "Track": False, "Album": False, "Title": False, "Multi": False, "Skip": False, "Extra": False, "Mix": False } artistName = getDirBasics(artistDir)[-1] albumName = albumDir.replace(artistDir, "")[1:] #print("albumName",albumName) j = 0 tags = {} print("\t-----> Album Info: {0} / {1} \t ==> {2} Songs".format( artistName, albumName, len(files))) ifiles = [] for jf, ifile in enumerate(files): results = MusicID(ifile, debug=args.debug) if results.skip is True: continue tags[j] = results.getInfo() ifiles.append(ifile) #pbcs[j] = pb.getPaths(ifile).getDict() j += 1 nfiles = j fixVals = {} for j in range(nfiles): ifile = ifiles[j] tag = tags[j] trackname = getBaseFilename(ifile) newtags = {"TrackNo": None, "Title": None} tracks = guessTrackNumber(trackname) retval = getBestVal(tracks) if retval is not None: if retval.get('Val') is not None: newtags["TrackNo"] = retval["Val"] trackname = stripName(retval["Rep"]) titles = guessTitle(trackname) retval = getBestVal(titles) if retval is not None: if retval.get('Val') is not None: newtags["Title"] = retval["Val"] trackname = stripName(retval["Rep"]) #if tags[j]["TrackNo"] is None: if newtags["TrackNo"] is not None: if fixVals.get(ifile) is None: fixVals[ifile] = {} fixVals[ifile]["track"] = newtags["TrackNo"] #if tags[j]["Title"] is None: if args.ignoretitle is False: if newtags["Title"] is not None: if fixVals.get(ifile) is None: fixVals[ifile] = {} fixVals[ifile]["title"] = newtags["Title"] if args.tryalbum: if fixVals.get(ifile) is None: fixVals[ifile] = {} fixVals[ifile]["album"] = albumName if len(fixVals) > 0: print("") for ifile in fixVals.keys(): print("mid -f \"{0}\" ".format(ifile), end="") for tag, val in fixVals[ifile].items(): print(" -{0} \"{1}\"".format(tag, val), end="") print("\n") if len(fixVals) > 0: print("")
def parse(self, modVal, expr='< 0 Days', force=True, debug=False): ts = timestat("Parsing ModVal={0} Extra Files".format(modVal)) tsFiles = timestat("Finding Files To Parse") newFiles = self.getArtistExtraFiles(modVal, expr, force=force) tsFiles.stop() N = len(newFiles) modValue = 50 if N >= 100 else 10 if N > 0: tsDB = timestat("Loading ModVal={0} DB Data".format(modVal)) dbdata = self.getDBData(modVal, force) tsDB.stop() newData = 0 tsParse = timestat("Parsing {0} New Extra Files For ModVal={1}".format(N, modVal)) for i,ifile in enumerate(newFiles): if (i+1) % modValue == 0 or (i+1) == N: print("{0: <15}Parsing {1}".format("{0}/{1}".format(i+1,N), ifile)) artistID = getBaseFilename(ifile) if len(artistID.split("-")) != 2: print("Error with extra file: {0}".format(ifile)) continue try: artistID = artistID.split("-")[0] except: print("Error with extra file: {0}".format(ifile)) continue info = self.artist.getData(ifile) currentKeys = [] if dbdata.get(artistID) is not None: currentKeys = list(dbdata[artistID].media.media.keys()) else: dbdata[artistID] = info newData += 1 continue keys = list(set(list(info.media.media.keys()) + currentKeys)) for k in keys: v = info.media.media.get(k) if v is None: continue iVal = {v2.code: v2 for v2 in v} dVal = dbdata[artistID].media.media.get(k) if dVal is None: Tretval = iVal else: Tretval = {v2.code: v2 for v2 in dVal} Tretval.update(iVal) dbdata[artistID].media.media[k] = list(Tretval.values()) newData += 1 tsParse.stop() if newData > 0: self.saveDBData(modVal, dbdata, newData) return newData > 0